drop quantize.py (now that models are using a single file)

author: Pavol Rusnak <pavol@rusnak.io> 2023-03-31 00:52:06 +0200
committer: Pavol Rusnak <pavol@rusnak.io> 2023-03-31 01:07:32 +0200
commit: 9733104be5389ebb1ff05095eca2a70280cd875a (patch)
tree: 284b9e2b26e39271889e0137e3fcafde2b0da414 /README.md
parent: 3df890aef432ce68143cfafcd7caf828bc4c3e55 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/README.md b/README.md
index cefcfb7..07066cd 100644
--- a/README.md
+++ b/README.md
@@ -155,8 +155,8 @@ python3 -m pip install torch numpy sentencepiece
 # convert the 7B model to ggml FP16 format
 python3 convert-pth-to-ggml.py models/7B/ 1
 
-# quantize the model to 4-bits
-python3 quantize.py 7B
+# quantize the model to 4-bits (using method 2 = q4_0)
+./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin 2
 
 # run the inference
 ./main -m ./models/7B/ggml-model-q4_0.bin -n 128
author	Pavol Rusnak <pavol@rusnak.io>	2023-03-31 00:52:06 +0200
committer	Pavol Rusnak <pavol@rusnak.io>	2023-03-31 01:07:32 +0200
commit	9733104be5389ebb1ff05095eca2a70280cd875a (patch)
tree	284b9e2b26e39271889e0137e3fcafde2b0da414 /README.md
parent	3df890aef432ce68143cfafcd7caf828bc4c3e55 (diff)