aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-03-10 21:50:46 +0200
committerGeorgi Gerganov <ggerganov@gmail.com>2023-03-10 21:50:46 +0200
commit319cdb3e1ffe263cf5b08249c9559e011396c1de (patch)
tree90c02a60d3e381ebd882c5c52d9dca114714ce43
parent775328064e69db1ebd7e19ccb59d2a7fa6142470 (diff)
Final touches
-rw-r--r--README.md3
-rw-r--r--main.cpp1
-rw-r--r--models/.gitignore0
-rw-r--r--utils.cpp54
-rw-r--r--utils.h6
5 files changed, 32 insertions, 32 deletions
diff --git a/README.md b/README.md
index 87808fd..d2b9a70 100644
--- a/README.md
+++ b/README.md
@@ -114,6 +114,5 @@ python3 convert-pth-to-ggml.py models/7B/ 1
In general, it seems to work, but I think it fails for unicode character support. Hopefully, someone can help with that
- I don't know yet how much the quantization affects the quality of the generated text
- Probably the token sampling can be improved
-- No Windows support
- x86 quantization support [not yet ready](https://github.com/ggerganov/ggml/pull/27). Basically, you want to run this on Apple Silicon
-
+
diff --git a/main.cpp b/main.cpp
index fb9eb17..982adf1 100644
--- a/main.cpp
+++ b/main.cpp
@@ -728,6 +728,7 @@ int main(int argc, char ** argv) {
// end of text token
if (embd.back() == 2) {
+ printf(" [end of text]\n");
break;
}
}
diff --git a/models/.gitignore b/models/.gitignore
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/models/.gitignore
diff --git a/utils.cpp b/utils.cpp
index 70a2ac2..cd9c001 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -231,39 +231,39 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
}
std::vector<gpt_vocab::id> llama_tokenize(const gpt_vocab & vocab, const std::string & text, bool bos) {
- auto res = gpt_tokenize(vocab, text);
+ //auto res = gpt_tokenize(vocab, text);
+
+ //if (bos) {
+ // res.insert(res.begin(), 1); // TODO: replace with vocab.bos
+ //}
+
+ std::vector<gpt_vocab::id> res;
if (bos) {
- res.insert(res.begin(), 1); // TODO: replace with vocab.bos
+ res.push_back(1); // TODO: replace with vocab.bos
}
- //std::vector<gpt_vocab::id> res;
+ //find the longest token that matches the text
+ int pos = 0;
+ while (true) {
+ int l = 0;
+ int t = 0;
+ for (const auto & kv : vocab.id_to_token) {
+ if (kv.second.size() < l) continue;
+ if (kv.second.size() > text.size() - pos) continue;
+ if (text.substr(pos, kv.second.size()) == kv.second) {
+ l = kv.second.size();
+ t = kv.first;
+ }
+ }
- //if (bos) {
- // res.push_back(1); // TODO: replace with vocab.bos
- //}
+ if (l == 0 && t != 13) {
+ break;
+ }
- // find the longest token that matches the text
- //int pos = 0;
- //while (true) {
- // int l = 0;
- // int t = 0;
- // for (const auto & kv : vocab.id_to_token) {
- // if (kv.second.size() < l) continue;
- // if (kv.second.size() > text.size() - pos) continue;
- // if (text.substr(pos, kv.second.size()) == kv.second) {
- // l = kv.second.size();
- // t = kv.first;
- // }
- // }
-
- // if (l == 0 && t != 13) {
- // break;
- // }
-
- // res.push_back(t);
- // pos += l;
- //}
+ res.push_back(t);
+ pos += l;
+ }
return res;
}
diff --git a/utils.h b/utils.h
index d291964..20c42ba 100644
--- a/utils.h
+++ b/utils.h
@@ -15,12 +15,12 @@
struct gpt_params {
int32_t seed = -1; // RNG seed
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
- int32_t n_predict = 200; // new tokens to predict
+ int32_t n_predict = 128; // new tokens to predict
// sampling parameters
- int32_t top_k = 100;
+ int32_t top_k = 40;
float top_p = 0.95f;
- float temp = 0.8f;
+ float temp = 0.80f;
int32_t n_batch = 8; // batch size for prompt processing