aboutsummaryrefslogtreecommitdiff
path: root/README.md
diff options
context:
space:
mode:
authorPavol Rusnak <pavol@rusnak.io>2023-03-31 00:52:06 +0200
committerPavol Rusnak <pavol@rusnak.io>2023-03-31 01:07:32 +0200
commit9733104be5389ebb1ff05095eca2a70280cd875a (patch)
tree284b9e2b26e39271889e0137e3fcafde2b0da414 /README.md
parent3df890aef432ce68143cfafcd7caf828bc4c3e55 (diff)
drop quantize.py (now that models are using a single file)
Diffstat (limited to 'README.md')
-rw-r--r--README.md4
1 files changed, 2 insertions, 2 deletions
diff --git a/README.md b/README.md
index cefcfb7..07066cd 100644
--- a/README.md
+++ b/README.md
@@ -155,8 +155,8 @@ python3 -m pip install torch numpy sentencepiece
# convert the 7B model to ggml FP16 format
python3 convert-pth-to-ggml.py models/7B/ 1
-# quantize the model to 4-bits
-python3 quantize.py 7B
+# quantize the model to 4-bits (using method 2 = q4_0)
+./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin 2
# run the inference
./main -m ./models/7B/ggml-model-q4_0.bin -n 128