aboutsummaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorStephan Walter <stephan@walter.name>2023-04-11 15:03:51 +0000
committerGitHub <noreply@github.com>2023-04-11 15:03:51 +0000
commit3e6e70d8e8917b5bd14c7c9f9b89a585f1ff0b31 (patch)
tree35dc380c6585d36941e155b9aae949e757af2b02 /llama.h
parent2663d2c6784ad7b77998c6874df25648d597f74b (diff)
Add enum llama_ftype, sync ggml_type to model files (#709)
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h10
1 files changed, 9 insertions, 1 deletions
diff --git a/llama.h b/llama.h
index 42c364c..8a0d50f 100644
--- a/llama.h
+++ b/llama.h
@@ -65,6 +65,14 @@ extern "C" {
void * progress_callback_user_data;
};
+ // model file types
+ enum llama_ftype {
+ LLAMA_FTYPE_ALL_F32 = 0,
+ LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
+ LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
+ LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
+ };
+
LLAMA_API struct llama_context_params llama_context_default_params();
LLAMA_API bool llama_mmap_supported();
@@ -85,7 +93,7 @@ extern "C" {
LLAMA_API int llama_model_quantize(
const char * fname_inp,
const char * fname_out,
- int itype);
+ enum llama_ftype ftype);
// Returns the KV cache that will contain the context for the
// ongoing prediction with the model.