1 files changed, 13 insertions, 13 deletions
diff --git a/examples/common.h b/examples/common.h
index 14e6b1b..fce1d42 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -17,7 +17,7 @@
 struct gpt_params {
     int32_t seed          = -1;   // RNG seed
     int32_t n_threads     = std::min(4, (int32_t) std::thread::hardware_concurrency());
-    int32_t n_predict     = 128;  // new tokens to predict
+    int32_t n_predict     = -1;   // new tokens to predict
     int32_t n_parts       = -1;   // amount of model parts (-1 = determine from model dimensions)
     int32_t n_ctx         = 512;  // context size
     int32_t n_batch       = 512;  // batch size for prompt processing (must be >=32 to use BLAS)
@@ -25,18 +25,18 @@ struct gpt_params {
 
     // sampling parameters
     std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
-    int32_t top_k = 0;              // <= 0 to use vocab size
-    float   top_p = 1.0f;           // 1.0 = disabled
-    float   tfs_z = 1.0f;           // 1.0 = disabled
-    float   typical_p = 1.0f;       // 1.0 = disabled
-    float   temp = 1.0f;            // 1.0 = disabled
-    float   repeat_penalty  = 1.0f; // 1.0 = disabled
-    int32_t repeat_last_n = -1;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
-    float   frequency_penalty = 0.0f; // 0.0 = disabled
-    float   presence_penalty = 0.0f;  // 0.0 = disabled
-    int     mirostat = 0;           // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-    float   mirostat_tau = 5.0f;    // target entropy
-    float   mirostat_eta = 0.1f;    // learning rate
+    int32_t top_k             = 40;    // <= 0 to use vocab size
+    float   top_p             = 0.95f; // 1.0 = disabled
+    float   tfs_z             = 1.00f; // 1.0 = disabled
+    float   typical_p         = 1.00f; // 1.0 = disabled
+    float   temp              = 0.80f; // 1.0 = disabled
+    float   repeat_penalty    = 1.10f; // 1.0 = disabled
+    int32_t repeat_last_n     = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    float   frequency_penalty = 0.00f; // 0.0 = disabled
+    float   presence_penalty  = 0.00f; // 0.0 = disabled
+    int     mirostat          = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+    float   mirostat_tau      = 5.00f; // target entropy
+    float   mirostat_eta      = 0.10f; // learning rate
 
     std::string model  = "models/lamma-7B/ggml-model.bin"; // model path
     std::string prompt = "";