diff options
author | beiller <beiller@gmail.com> | 2023-03-12 05:27:42 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-12 11:27:42 +0200 |
commit | 129c7d1ea886e52ac1b87ff6184310bab3158806 (patch) | |
tree | fe55fbb47ffb6bd7676890bf342c5b8c7f8c6c52 /utils.h | |
parent | 702fddf5c5c3c1377e169ba9ecdfed4cb16c268b (diff) |
Add repetition penalty (#20)
* Adding repeat penalization
* Update utils.h
* Update utils.cpp
* Numeric fix
Should probably still scale by temp even if penalized
* Update comments, more proper application
I see that numbers can go negative so a fix from a referenced commit
* Minor formatting
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'utils.h')
-rw-r--r-- | utils.h | 4 |
1 files changed, 4 insertions, 0 deletions
@@ -16,11 +16,13 @@ struct gpt_params { int32_t seed = -1; // RNG seed int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); int32_t n_predict = 128; // new tokens to predict + int32_t repeat_last_n = 64; // last n tokens to penalize // sampling parameters int32_t top_k = 40; // unused float top_p = 0.95f; float temp = 0.80f; + float repeat_penalty = 1.30f; int32_t n_batch = 8; // batch size for prompt processing @@ -89,6 +91,8 @@ gpt_vocab::id gpt_sample_top_k_top_p( gpt_vocab::id llama_sample_top_p( const gpt_vocab & vocab, const float * logits, + std::vector<gpt_vocab::id> & last_n_tokens, + double repeat_penalty, double top_p, double temp, std::mt19937 & rng); |