aboutsummaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-05-19 22:17:18 +0300
committerGitHub <noreply@github.com>2023-05-19 22:17:18 +0300
commit2d5db48371052087a83974abda3767d1aedec598 (patch)
treeca7e6ad4b2be21d96272aece6489b2f39c444ecb /llama.h
parent6986c7835adc13ba3f9d933b95671bb1f3984dc6 (diff)
ggml : use F16 instead of F32 in Q4_0, Q4_1, Q8_0 (#1508)
* ggml : use F16 instead of F32 in Q4_0, Q4_1 and Q8_0 * llama : bump LLAMA_FILE_VERSION to 3 * cuda : update Q4 and Q8 dequantize kernels * ggml : fix AVX dot products * readme : update performance table + hot topics
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h2
1 files changed, 1 insertions, 1 deletions
diff --git a/llama.h b/llama.h
index f955fa2..fd3f21e 100644
--- a/llama.h
+++ b/llama.h
@@ -19,7 +19,7 @@
# define LLAMA_API
#endif
-#define LLAMA_FILE_VERSION 2
+#define LLAMA_FILE_VERSION 3
#define LLAMA_FILE_MAGIC 'ggjt'
#define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml'
#define LLAMA_SESSION_MAGIC 'ggsn'