We could use std::unordered_map over std::map (#305)

* Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token; * fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size()); * Removed include <map> * Nest struct token score inside gpt_vocab * renamed token to tok
author: Fabio R. Sluzala <Fabio3rs@users.noreply.github.com> 2023-03-21 14:21:50 -0300
committer: GitHub <noreply@github.com> 2023-03-21 19:21:50 +0200
commit: 353ec251a42491f5192c48561da4b444ef67f23c (patch)
tree: 95783d81ab8be8a6875ec708e2e2a7882222250a /main.cpp
parent: 89d5d90f3b6d25f134da7a8e252c3432bffcf674 (diff)
1 files changed, 10 insertions, 8 deletions
diff --git a/main.cpp b/main.cpp
index 43b82b1..fe9e583 100644
--- a/main.cpp
+++ b/main.cpp
@@ -9,7 +9,6 @@
 #include <cstring>
 #include <fstream>
 #include <iostream>
-#include <map>
 #include <string>
 #include <vector>
 
@@ -69,7 +68,7 @@ void set_console_state(console_state new_st)
 static const int EOS_TOKEN_ID = 2;
 
 // determine number of model parts based on the dimension
-static const std::map<int, int> LLAMA_N_PARTS = {
+static const std::unordered_map<int, int> LLAMA_N_PARTS = {
     { 4096, 1 },
     { 5120, 2 },
     { 6656, 4 },
@@ -123,7 +122,7 @@ struct llama_model {
 
     //
     struct ggml_context * ctx;
-    std::map<std::string, struct ggml_tensor *> tensors;
+    std::unordered_map<std::string, struct ggml_tensor *> tensors;
 };
 
 // load the model's weights from a file
@@ -208,6 +207,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
     // load vocab
     {
         std::string word;
+        vocab.id_to_token.resize(model.hparams.n_vocab);
         std::vector<char> tmp(64);
 
         for (int i = 0; i < model.hparams.n_vocab; i++) {
@@ -227,8 +227,10 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
             fin.read((char *) &score, sizeof(score));
 
             vocab.token_to_id[word] = i;
-            vocab.id_to_token[i] = word;
-            vocab.score[i] = score;
+
+            auto &tok_score = vocab.id_to_token[i];
+            tok_score.tok = word;
+            tok_score.score = score;
         }
     }
 
@@ -1028,7 +1030,7 @@ int main(int argc, char ** argv) {
     fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
     fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
     for (int i = 0; i < (int) embd_inp.size(); i++) {
-        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).c_str());
+        fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], vocab.id_to_token.at(embd_inp[i]).tok.c_str());
     }
     fprintf(stderr, "\n");
     if (params.interactive) {
@@ -1154,7 +1156,7 @@ int main(int argc, char ** argv) {
         // display text
         if (!input_noecho) {
             for (auto id : embd) {
-                printf("%s", vocab.id_to_token[id].c_str());
+                printf("%s", vocab.id_to_token[id].tok.c_str());
             }
             fflush(stdout);
         }
@@ -1169,7 +1171,7 @@ int main(int argc, char ** argv) {
             // check for reverse prompt
             std::string last_output;
             for (auto id : last_n_tokens) {
-                last_output += vocab.id_to_token[id];
+                last_output += vocab.id_to_token[id].tok;
             }
 
             // Check if each of the reverse prompts appears at the end of the output.
author	Fabio R. Sluzala <Fabio3rs@users.noreply.github.com>	2023-03-21 14:21:50 -0300
committer	GitHub <noreply@github.com>	2023-03-21 19:21:50 +0200
commit	353ec251a42491f5192c48561da4b444ef67f23c (patch)
tree	95783d81ab8be8a6875ec708e2e2a7882222250a /main.cpp
parent	89d5d90f3b6d25f134da7a8e252c3432bffcf674 (diff)