From e0305ead3a072db9c08b35c9600c49273b38a4b5 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 20 Apr 2023 20:35:53 +0300 Subject: ggml : add Q4_3 quantization (#1082) --- ggml.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'ggml.h') diff --git a/ggml.h b/ggml.h index 570147f..6e81d81 100644 --- a/ggml.h +++ b/ggml.h @@ -205,7 +205,8 @@ enum ggml_type { GGML_TYPE_Q4_0 = 2, GGML_TYPE_Q4_1 = 3, GGML_TYPE_Q4_2 = 4, - GGML_TYPE_Q8_0 = 5, + GGML_TYPE_Q4_3 = 5, + GGML_TYPE_Q8_0 = 6, GGML_TYPE_I8, GGML_TYPE_I16, GGML_TYPE_I32, @@ -360,6 +361,8 @@ const char * ggml_type_name(enum ggml_type type); size_t ggml_element_size(const struct ggml_tensor * tensor); +bool ggml_is_quantized(enum ggml_type type); + struct ggml_context * ggml_init(struct ggml_init_params params); void ggml_free(struct ggml_context * ctx); @@ -808,6 +811,7 @@ enum ggml_opt_result ggml_opt( size_t ggml_quantize_q4_0(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q4_1(const float * src, void * dst, int n, int k, int64_t * hist); size_t ggml_quantize_q4_2(const float * src, void * dst, int n, int k, int64_t * hist); +size_t ggml_quantize_q4_3(const float * src, void * dst, int n, int k, int64_t * hist); // // system info -- cgit v1.2.3