Add repetition penalty (#20)

* Adding repeat penalization * Update utils.h * Update utils.cpp * Numeric fix Should probably still scale by temp even if penalized * Update comments, more proper application I see that numbers can go negative so a fix from a referenced commit * Minor formatting --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: beiller <beiller@gmail.com> 2023-03-12 05:27:42 -0400
committer: GitHub <noreply@github.com> 2023-03-12 11:27:42 +0200
commit: 129c7d1ea886e52ac1b87ff6184310bab3158806 (patch)
tree: fe55fbb47ffb6bd7676890bf342c5b8c7f8c6c52 /utils.h
parent: 702fddf5c5c3c1377e169ba9ecdfed4cb16c268b (diff)
1 files changed, 4 insertions, 0 deletions
diff --git a/utils.h b/utils.h
index bbe8fe8..e331904 100644
--- a/utils.h
+++ b/utils.h
@@ -16,11 +16,13 @@ struct gpt_params {
     int32_t seed      = -1; // RNG seed
     int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
     int32_t n_predict = 128; // new tokens to predict
+    int32_t repeat_last_n = 64;  // last n tokens to penalize
 
     // sampling parameters
     int32_t top_k = 40; // unused
     float   top_p = 0.95f;
     float   temp  = 0.80f;
+    float   repeat_penalty  = 1.30f;
 
     int32_t n_batch = 8; // batch size for prompt processing
 
@@ -89,6 +91,8 @@ gpt_vocab::id gpt_sample_top_k_top_p(
 gpt_vocab::id llama_sample_top_p(
         const gpt_vocab & vocab,
         const float * logits,
+        std::vector<gpt_vocab::id> & last_n_tokens,
+        double repeat_penalty,
         double top_p,
         double temp,
         std::mt19937 & rng);
author	beiller <beiller@gmail.com>	2023-03-12 05:27:42 -0400
committer	GitHub <noreply@github.com>	2023-03-12 11:27:42 +0200
commit	129c7d1ea886e52ac1b87ff6184310bab3158806 (patch)
tree	fe55fbb47ffb6bd7676890bf342c5b8c7f8c6c52 /utils.h
parent	702fddf5c5c3c1377e169ba9ecdfed4cb16c268b (diff)