From 2d5db48371052087a83974abda3767d1aedec598 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Fri, 19 May 2023 22:17:18 +0300
Subject: ggml : use F16 instead of F32 in Q4_0, Q4_1, Q8_0 (#1508)

* ggml : use F16 instead of F32 in Q4_0, Q4_1 and Q8_0

* llama : bump LLAMA_FILE_VERSION to 3

* cuda : update Q4 and Q8 dequantize kernels

* ggml : fix AVX dot products

* readme : update performance table + hot topics
---
 ggml.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'ggml.h')

diff --git a/ggml.h b/ggml.h
index 255541d..dce5ca1 100644
--- a/ggml.h
+++ b/ggml.h
@@ -190,7 +190,7 @@
 #define GGML_FILE_MAGIC   0x67676d6c // "ggml"
 #define GGML_FILE_VERSION 1
 
-#define GGML_QNT_VERSION        1    // bump this on quantization format changes
+#define GGML_QNT_VERSION        2    // bump this on quantization format changes
 #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
 
 #define GGML_MAX_DIMS          4
-- 
cgit v1.2.3