ggml : add Q4_3 quantization (#1082)

author: Georgi Gerganov <ggerganov@gmail.com> 2023-04-20 20:35:53 +0300
committer: GitHub <noreply@github.com> 2023-04-20 20:35:53 +0300
commit: e0305ead3a072db9c08b35c9600c49273b38a4b5 (patch)
tree: 6352a4bb010b4e2aa7136bdb925d7a7a82e83fd6 /ggml.h
parent: 6a9661ea5ad72166b700ae5e87976e4452499dda (diff)
1 files changed, 5 insertions, 1 deletions
diff --git a/ggml.h b/ggml.h
index 570147f..6e81d81 100644
--- a/ggml.h
+++ b/ggml.h
@@ -205,7 +205,8 @@ enum ggml_type {
     GGML_TYPE_Q4_0 = 2,
     GGML_TYPE_Q4_1 = 3,
     GGML_TYPE_Q4_2 = 4,
-    GGML_TYPE_Q8_0 = 5,
+    GGML_TYPE_Q4_3 = 5,
+    GGML_TYPE_Q8_0 = 6,
     GGML_TYPE_I8,
     GGML_TYPE_I16,
     GGML_TYPE_I32,
@@ -360,6 +361,8 @@ const char * ggml_type_name(enum ggml_type type);
 
 size_t ggml_element_size(const struct ggml_tensor * tensor);
 
+bool ggml_is_quantized(enum ggml_type type);
+
 struct ggml_context * ggml_init(struct ggml_init_params params);
 void ggml_free(struct ggml_context * ctx);
 
@@ -808,6 +811,7 @@ enum ggml_opt_result ggml_opt(
 size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist);
 size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist);
 size_t ggml_quantize_q4_2(const float * src, void * dst, int n, int k, int64_t * hist);
+size_t ggml_quantize_q4_3(const float * src, void * dst, int n, int k, int64_t * hist);
 
 //
 // system info
author	Georgi Gerganov <ggerganov@gmail.com>	2023-04-20 20:35:53 +0300
committer	GitHub <noreply@github.com>	2023-04-20 20:35:53 +0300
commit	e0305ead3a072db9c08b35c9600c49273b38a4b5 (patch)
tree	6352a4bb010b4e2aa7136bdb925d7a7a82e83fd6 /ggml.h
parent	6a9661ea5ad72166b700ae5e87976e4452499dda (diff)