1 files changed, 12 insertions, 8 deletions
diff --git a/main.cpp b/main.cpp
index 662a2a7..6bae80c 100644
--- a/main.cpp
+++ b/main.cpp
@@ -165,12 +165,20 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
     // load vocab
     {
         std::string word;
+        std::vector<char> tmp(64);
+
         for (int i = 0; i < model.hparams.n_vocab; i++) {
             uint32_t len;
             fin.read((char *) &len, sizeof(len));
 
             word.resize(len);
-            fin.read((char *) word.data(), len);
+            if (len > 0) {
+                tmp.resize(len);
+                fin.read(tmp.data(), len);
+                word.assign(tmp.data(), len);
+            } else {
+                word.clear();
+            }
 
             float score;
             fin.read((char *) &score, sizeof(score));
@@ -178,10 +186,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
             vocab.token_to_id[word] = i;
             vocab.id_to_token[i] = word;
             vocab.score[i] = score;
-
-            //if (i < 30000) {
-            //    fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
-            //}
         }
     }
 
@@ -974,7 +978,7 @@ int main(int argc, char ** argv) {
         n_past += embd.size();
         embd.clear();
 
-        if (embd_inp.size() <= input_consumed) {
+        if ((int) embd_inp.size() <= input_consumed) {
             // out of user input, sample next token
             const float top_k = params.top_k;
             const float top_p = params.top_p;
@@ -1011,7 +1015,7 @@ int main(int argc, char ** argv) {
             --remaining_tokens;
         } else {
             // some user input remains from prompt or interaction, forward it to processing
-            while (embd_inp.size() > input_consumed) {
+            while ((int) embd_inp.size() > input_consumed) {
                 embd.push_back(embd_inp[input_consumed]);
                 last_n_tokens.erase(last_n_tokens.begin());
                 last_n_tokens.push_back(embd_inp[input_consumed]);
@@ -1036,7 +1040,7 @@ int main(int argc, char ** argv) {
 
         // in interactive mode, and not currently processing queued inputs;
         // check if we should prompt the user for more
-        if (params.interactive && embd_inp.size() <= input_consumed) {
+        if (params.interactive && (int) embd_inp.size() <= input_consumed) {
             // check for reverse prompt
             for (auto antiprompt_inp : antipromptv_inp) {
                 if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) {