We could use std::unordered_map over std::map (#305)

* Improve performance by changing std::map to std::unordered_map and std::map<id, token> id_to_token; to std::vector<token> id_to_token; * fix last commit on gpt_vocab_init add vocab.id_to_token.resize(vocab.token_to_id.size()); * Removed include <map> * Nest struct token score inside gpt_vocab * renamed token to tok
author: Fabio R. Sluzala <Fabio3rs@users.noreply.github.com> 2023-03-21 14:21:50 -0300
committer: GitHub <noreply@github.com> 2023-03-21 19:21:50 +0200
commit: 353ec251a42491f5192c48561da4b444ef67f23c (patch)
tree: 95783d81ab8be8a6875ec708e2e2a7882222250a /utils.h
parent: 89d5d90f3b6d25f134da7a8e252c3432bffcf674 (diff)
1 files changed, 9 insertions, 5 deletions
diff --git a/utils.h b/utils.h
index 6693775..3129038 100644
--- a/utils.h
+++ b/utils.h
@@ -3,7 +3,7 @@
 #pragma once
 
 #include <string>
-#include <map>
+#include <unordered_map>
 #include <vector>
 #include <random>
 #include <thread>
@@ -65,15 +65,19 @@ struct llama_vocab {
     using id    = int32_t;
     using token = std::string;
 
-    std::map<token, id> token_to_id;
-    std::map<id, token> id_to_token;
-    std::map<id, float> score;
+    struct token_score {
+        token tok;
+        float score;
+    };
+
+    std::unordered_map<token, id> token_to_id;
+    std::vector<token_score> id_to_token;
 };
 
 void replace(std::string & str, const std::string & needle, const std::string & replacement);
 
 // poor-man's JSON parsing
-std::map<std::string, int32_t> json_parse(const std::string & fname);
+std::unordered_map<std::string, int32_t> json_parse(const std::string & fname);
 
 // TODO: temporary until #77 is merged, need this now for some tokenizer tests
 bool llama_vocab_load(const std::string & fname, llama_vocab & vocab);
author	Fabio R. Sluzala <Fabio3rs@users.noreply.github.com>	2023-03-21 14:21:50 -0300
committer	GitHub <noreply@github.com>	2023-03-21 19:21:50 +0200
commit	353ec251a42491f5192c48561da4b444ef67f23c (patch)
tree	95783d81ab8be8a6875ec708e2e2a7882222250a /utils.h
parent	89d5d90f3b6d25f134da7a8e252c3432bffcf674 (diff)