aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-07-18 14:24:43 +0300
committerGitHub <noreply@github.com>2023-07-18 14:24:43 +0300
commitd01bccde9f759b24449fdaa16306b406a50eb367 (patch)
treea1c351c6732f399f540a1e91f957eb61db535bbc /llama.cpp
parent6cbf9dfb32f0e23ed3afd02d30ab066ed53e2c4d (diff)
ci : integrate with ggml-org/ci (#2250)
* ci : run ctest ggml-ci * ci : add open llama 3B-v2 tests ggml-ci * ci : disable wget progress output ggml-ci * ci : add open llama 3B-v2 tg tests for q4 and q5 quantizations ggml-ci * tests : try to fix tail free sampling test ggml-ci * ci : add K-quants ggml-ci * ci : add short perplexity tests ggml-ci * ci : add README.md * ppl : add --chunks argument to limit max number of chunks ggml-ci * ci : update README
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp15
1 files changed, 12 insertions, 3 deletions
diff --git a/llama.cpp b/llama.cpp
index 0f9d534..fa3b7c0 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2024,9 +2024,18 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array *
}
// Normalize the second derivatives
- float second_derivatives_sum = std::accumulate(second_derivatives.begin(), second_derivatives.end(), 0.0f);
- for (float & value : second_derivatives) {
- value /= second_derivatives_sum;
+ {
+ const float second_derivatives_sum = std::accumulate(second_derivatives.begin(), second_derivatives.end(), 0.0f);
+
+ if (second_derivatives_sum > 1e-6f) {
+ for (float & value : second_derivatives) {
+ value /= second_derivatives_sum;
+ }
+ } else {
+ for (float & value : second_derivatives) {
+ value = 1.0f / second_derivatives.size();
+ }
+ }
}
float cum_sum = 0.0f;