Add enum llama_ftype, sync ggml_type to model files (#709)

author: Stephan Walter <stephan@walter.name> 2023-04-11 15:03:51 +0000
committer: GitHub <noreply@github.com> 2023-04-11 15:03:51 +0000
commit: 3e6e70d8e8917b5bd14c7c9f9b89a585f1ff0b31 (patch)
tree: 35dc380c6585d36941e155b9aae949e757af2b02 /llama.h
parent: 2663d2c6784ad7b77998c6874df25648d597f74b (diff)
1 files changed, 9 insertions, 1 deletions
diff --git a/llama.h b/llama.h
index 42c364c..8a0d50f 100644
--- a/llama.h
+++ b/llama.h
@@ -65,6 +65,14 @@ extern "C" {
         void * progress_callback_user_data;
     };
 
+    // model file types
+    enum llama_ftype {
+        LLAMA_FTYPE_ALL_F32     = 0,
+        LLAMA_FTYPE_MOSTLY_F16  = 1,  // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_0 = 2,  // except 1d tensors
+        LLAMA_FTYPE_MOSTLY_Q4_1 = 3,  // except 1d tensors
+    };
+
     LLAMA_API struct llama_context_params llama_context_default_params();
 
     LLAMA_API bool llama_mmap_supported();
@@ -85,7 +93,7 @@ extern "C" {
     LLAMA_API int llama_model_quantize(
             const char * fname_inp,
             const char * fname_out,
-                   int   itype);
+      enum llama_ftype   ftype);
 
     // Returns the KV cache that will contain the context for the
     // ongoing prediction with the model.
author	Stephan Walter <stephan@walter.name>	2023-04-11 15:03:51 +0000
committer	GitHub <noreply@github.com>	2023-04-11 15:03:51 +0000
commit	3e6e70d8e8917b5bd14c7c9f9b89a585f1ff0b31 (patch)
tree	35dc380c6585d36941e155b9aae949e757af2b02 /llama.h
parent	2663d2c6784ad7b77998c6874df25648d597f74b (diff)