aboutsummaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-04-20 20:35:53 +0300
committerGitHub <noreply@github.com>2023-04-20 20:35:53 +0300
commite0305ead3a072db9c08b35c9600c49273b38a4b5 (patch)
tree6352a4bb010b4e2aa7136bdb925d7a7a82e83fd6 /llama.h
parent6a9661ea5ad72166b700ae5e87976e4452499dda (diff)
ggml : add Q4_3 quantization (#1082)
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 208b03d..011e34c 100644
--- a/llama.h
+++ b/llama.h
@@ -73,6 +73,7 @@ extern "C" {
LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
LLAMA_FTYPE_MOSTLY_Q4_2 = 5, // except 1d tensors
+ LLAMA_FTYPE_MOSTLY_Q4_3 = 6, // except 1d tensors
};
LLAMA_API struct llama_context_params llama_context_default_params();