fix perplexity after c-api refactor (#390)

* preallocate a buffer of fitting size for tokenization (utils.cpp) * don't create a new std::string (especially here, where it's usually large)
author: Erik Scholz <Green-Sky@users.noreply.github.com> 2023-03-22 17:09:38 +0100
committer: GitHub <noreply@github.com> 2023-03-22 18:09:38 +0200
commit: 56e659a0b271436e24813a801640d015e7b05328 (patch)
tree: 599b4c53f4f81b5de59f477d331b28bd260fc78d
parent: 40ea807a972ec7b5a426f034ebfa593b5e7a06ed (diff)
2 files changed, 4 insertions, 2 deletions
diff --git a/main.cpp b/main.cpp
index c164c10..fbb43a8 100644
--- a/main.cpp
+++ b/main.cpp
@@ -85,7 +85,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
     // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
     // Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
     // Output: `perplexity: 13.5106 [114/114]`
-    auto tokens = ::llama_tokenize(ctx, params.prompt.c_str(), true);
+    auto tokens = ::llama_tokenize(ctx, params.prompt, true);
 
     int count = 0;
     double nll = 0.0;
diff --git a/utils.cpp b/utils.cpp
index 1679ae1..3909c97 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -146,8 +146,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
 
 // TODO: not great allocating this every time
 std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
-    std::vector<llama_token> res(8096);
+    // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
+    std::vector<llama_token> res(text.size() + (int)add_bos);
     int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
+    assert(n >= 0);
     res.resize(n);
 
     return res;
author	Erik Scholz <Green-Sky@users.noreply.github.com>	2023-03-22 17:09:38 +0100
committer	GitHub <noreply@github.com>	2023-03-22 18:09:38 +0200
commit	56e659a0b271436e24813a801640d015e7b05328 (patch)
tree	599b4c53f4f81b5de59f477d331b28bd260fc78d
parent	40ea807a972ec7b5a426f034ebfa593b5e7a06ed (diff)