From 2d5db48371052087a83974abda3767d1aedec598 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 19 May 2023 22:17:18 +0300 Subject: ggml : use F16 instead of F32 in Q4_0, Q4_1, Q8_0 (#1508) * ggml : use F16 instead of F32 in Q4_0, Q4_1 and Q8_0 * llama : bump LLAMA_FILE_VERSION to 3 * cuda : update Q4 and Q8 dequantize kernels * ggml : fix AVX dot products * readme : update performance table + hot topics --- ggml.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ggml.h') diff --git a/ggml.h b/ggml.h index 255541d..dce5ca1 100644 --- a/ggml.h +++ b/ggml.h @@ -190,7 +190,7 @@ #define GGML_FILE_MAGIC 0x67676d6c // "ggml" #define GGML_FILE_VERSION 1 -#define GGML_QNT_VERSION 1 // bump this on quantization format changes +#define GGML_QNT_VERSION 2 // bump this on quantization format changes #define GGML_QNT_VERSION_FACTOR 1000 // do not change this #define GGML_MAX_DIMS 4 -- cgit v1.2.3