aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorEvan Jones <evan.q.jones@gmail.com>2023-06-03 07:28:45 -0400
committerGitHub <noreply@github.com>2023-06-03 07:28:45 -0400
commit136476e898fb96c302b0829ee3e79267ae12660f (patch)
treeaeeb26dcfef0ecb513664e06cedc81f237f8e42e /examples
parentffb06a345e3a9e30d39aaa5b46a23201a74be6de (diff)
Fix prompt cache saving and chat-persistent rollover (#1678)
* Fix prompt cache saving and chat-persistent rollover (fixes #1670) * clang-tidy Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Diffstat (limited to 'examples')
-rw-r--r--examples/main/main.cpp13
1 files changed, 7 insertions, 6 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 6131f5b..57cc1e4 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -202,6 +202,13 @@ int main(int argc, char ** argv) {
}
}
+ // if we will use the cache for the full prompt without reaching the end of the cache, force
+ // reevaluation of the last token token to recalculate the cached logits
+ if (!embd_inp.empty() && n_matching_session_tokens == embd_inp.size() &&
+ session_tokens.size() > embd_inp.size()) {
+ session_tokens.resize(embd_inp.size() - 1);
+ }
+
// number of tokens to keep when resetting context
if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size() || params.instruct) {
params.n_keep = (int)embd_inp.size();
@@ -360,12 +367,6 @@ int main(int argc, char ** argv) {
}
}
if (i > 0) {
- // check if we've used up all the prompt but not all cached tokens
- if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
- // force revaluation of the last token to recalculate logits
- i--;
- n_past--;
- }
embd.erase(embd.begin(), embd.begin() + i);
}
}