diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-07-18 14:24:43 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-18 14:24:43 +0300 |
commit | d01bccde9f759b24449fdaa16306b406a50eb367 (patch) | |
tree | a1c351c6732f399f540a1e91f957eb61db535bbc /llama.cpp | |
parent | 6cbf9dfb32f0e23ed3afd02d30ab066ed53e2c4d (diff) |
ci : integrate with ggml-org/ci (#2250)
* ci : run ctest
ggml-ci
* ci : add open llama 3B-v2 tests
ggml-ci
* ci : disable wget progress output
ggml-ci
* ci : add open llama 3B-v2 tg tests for q4 and q5 quantizations
ggml-ci
* tests : try to fix tail free sampling test
ggml-ci
* ci : add K-quants
ggml-ci
* ci : add short perplexity tests
ggml-ci
* ci : add README.md
* ppl : add --chunks argument to limit max number of chunks
ggml-ci
* ci : update README
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 15 |
1 files changed, 12 insertions, 3 deletions
@@ -2024,9 +2024,18 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * } // Normalize the second derivatives - float second_derivatives_sum = std::accumulate(second_derivatives.begin(), second_derivatives.end(), 0.0f); - for (float & value : second_derivatives) { - value /= second_derivatives_sum; + { + const float second_derivatives_sum = std::accumulate(second_derivatives.begin(), second_derivatives.end(), 0.0f); + + if (second_derivatives_sum > 1e-6f) { + for (float & value : second_derivatives) { + value /= second_derivatives_sum; + } + } else { + for (float & value : second_derivatives) { + value = 1.0f / second_derivatives.size(); + } + } } float cum_sum = 0.0f; |