aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-03-21 17:59:16 +0200
committerGeorgi Gerganov <ggerganov@gmail.com>2023-03-21 17:59:16 +0200
commit3bfa3b43b7319b71853bfc7d3cf4e9767c24bbc8 (patch)
tree6c5bff70b2bc2a5f38e105e5a506511584a887f5
parent715d292ee0e34d27f27af43d7feaad1f1344981d (diff)
Fix convert script, warnings alpaca instructions, default params
-rw-r--r--README.md10
-rwxr-xr-xalpaca.sh2
-rw-r--r--convert-pth-to-ggml.py8
-rw-r--r--main.cpp20
4 files changed, 23 insertions, 17 deletions
diff --git a/README.md b/README.md
index 349e82c..f0b0c2a 100644
--- a/README.md
+++ b/README.md
@@ -193,15 +193,15 @@ First, download the `ggml` Alpaca model into the `./models` folder:
```
# use one of these
# TODO: add a script to simplify the download
-curl -o ggml2-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
-curl -o ggml2-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
-curl -o ggml2-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
+curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://gateway.estuary.tech/gw/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
+curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://ipfs.io/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
+curl -o ./models/ggml-alpaca-7b-q4.bin -C - https://cloudflare-ipfs.com/ipfs/QmUp1UGeQFDqJKvtjbSYPBiZZKRjLp8shVP9hT8ZB9Ynv1
```
Now run the `main` tool like this:
```
-./main -m ./models/ggml2-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins
+./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins
```
Sample run:
@@ -218,7 +218,7 @@ Sample run:
There 26 letters in the English Alphabet
> What is the most common way of transportation in Amsterdam?
The majority (54%) are using public transit. This includes buses, trams and metros with over 100 lines throughout the city which make it very accessible for tourists to navigate around town as well as locals who commute by tram or metro on a daily basis
-> List 5 words that start with "ca".
+> List 5 words that start with "ca".
cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach.
>
```
diff --git a/alpaca.sh b/alpaca.sh
index 284989b..2f36d6f 100755
--- a/alpaca.sh
+++ b/alpaca.sh
@@ -3,4 +3,4 @@
# Temporary script - will be removed in the future
#
-./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.96 --repeat_penalty 1 -t 7
+./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7
diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index 46f7eba..db5b00f 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -27,9 +27,9 @@ from sentencepiece import SentencePieceProcessor
def parse_args():
parser = argparse.ArgumentParser(description='Convert a LLaMA model checkpoint to a ggml compatible file')
- parser.add_argument('dir_model', help='directory containing the model checkpoint')
- parser.add_argument('ftype', type=int, choices=[0, 1], default=1, help='file type (0: float32, 1: float16)')
- parser.add_argument('vocab_only', type=bool, default=False, help='only write vocab to file')
+ parser.add_argument('dir_model', help='directory containing the model checkpoint')
+ parser.add_argument('ftype', help='file type (0: float32, 1: float16)', type=int, choices=[0, 1], default=1)
+ parser.add_argument('vocab_only', help='only write vocab to file', type=int, default=0, nargs='?')
return parser.parse_args()
def get_n_parts(dim):
@@ -135,6 +135,8 @@ def main():
hparams, tokenizer = load_hparams_and_tokenizer(dir_model)
+ print(args)
+
# if only writing vocab to file
if args.vocab_only:
diff --git a/main.cpp b/main.cpp
index 662a2a7..6bae80c 100644
--- a/main.cpp
+++ b/main.cpp
@@ -165,12 +165,20 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
// load vocab
{
std::string word;
+ std::vector<char> tmp(64);
+
for (int i = 0; i < model.hparams.n_vocab; i++) {
uint32_t len;
fin.read((char *) &len, sizeof(len));
word.resize(len);
- fin.read((char *) word.data(), len);
+ if (len > 0) {
+ tmp.resize(len);
+ fin.read(tmp.data(), len);
+ word.assign(tmp.data(), len);
+ } else {
+ word.clear();
+ }
float score;
fin.read((char *) &score, sizeof(score));
@@ -178,10 +186,6 @@ bool llama_model_load(const std::string & fname, llama_model & model, llama_voca
vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word;
vocab.score[i] = score;
-
- //if (i < 30000) {
- // fprintf(stderr, "%s: vocab[%d] = '%s'\n", __func__, i, word.c_str());
- //}
}
}
@@ -974,7 +978,7 @@ int main(int argc, char ** argv) {
n_past += embd.size();
embd.clear();
- if (embd_inp.size() <= input_consumed) {
+ if ((int) embd_inp.size() <= input_consumed) {
// out of user input, sample next token
const float top_k = params.top_k;
const float top_p = params.top_p;
@@ -1011,7 +1015,7 @@ int main(int argc, char ** argv) {
--remaining_tokens;
} else {
// some user input remains from prompt or interaction, forward it to processing
- while (embd_inp.size() > input_consumed) {
+ while ((int) embd_inp.size() > input_consumed) {
embd.push_back(embd_inp[input_consumed]);
last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(embd_inp[input_consumed]);
@@ -1036,7 +1040,7 @@ int main(int argc, char ** argv) {
// in interactive mode, and not currently processing queued inputs;
// check if we should prompt the user for more
- if (params.interactive && embd_inp.size() <= input_consumed) {
+ if (params.interactive && (int) embd_inp.size() <= input_consumed) {
// check for reverse prompt
for (auto antiprompt_inp : antipromptv_inp) {
if (antiprompt_inp.size() && std::equal(antiprompt_inp.rbegin(), antiprompt_inp.rend(), last_n_tokens.rbegin())) {