From f9a6364912fd0463fddfdbc9ef9f79fdc281570d Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Mon, 8 May 2023 17:41:54 +0300
Subject: llama : require first token to be BOS (#1303)

* llama : require first token to be BOS

* scripts : add ppl-run-all.sh

* perplexity : add BOS for each chunk

* readme : update perplexity values after BOS fix

* perplexity : add clarifying comments
---
 examples/main/main.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'examples/main')

diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 5ac151e..045093c 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -313,7 +313,8 @@ int main(int argc, char ** argv) {
             if (n_past + (int) embd.size() > n_ctx) {
                 const int n_left = n_past - params.n_keep;
 
-                n_past = params.n_keep;
+                // always keep the first token - BOS
+                n_past = std::max(1, params.n_keep);
 
                 // insert n_left/2 tokens at the start of embd from last_n_tokens
                 embd.insert(embd.begin(), last_n_tokens.begin() + n_ctx - n_left/2 - embd.size(), last_n_tokens.end() - embd.size());
@@ -331,7 +332,6 @@ int main(int argc, char ** argv) {
             }
 
             // try to reuse a matching prefix from the loaded session instead of re-eval (via n_past)
-            // REVIEW
             if (n_session_consumed < (int) session_tokens.size()) {
                 size_t i = 0;
                 for ( ; i < embd.size(); i++) {
-- 
cgit v1.2.3