diff options
author | Stephan Walter <stephan@walter.name> | 2023-04-11 15:03:51 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-11 15:03:51 +0000 |
commit | 3e6e70d8e8917b5bd14c7c9f9b89a585f1ff0b31 (patch) | |
tree | 35dc380c6585d36941e155b9aae949e757af2b02 /llama.h | |
parent | 2663d2c6784ad7b77998c6874df25648d597f74b (diff) |
Add enum llama_ftype, sync ggml_type to model files (#709)
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 10 |
1 files changed, 9 insertions, 1 deletions
@@ -65,6 +65,14 @@ extern "C" { void * progress_callback_user_data; }; + // model file types + enum llama_ftype { + LLAMA_FTYPE_ALL_F32 = 0, + LLAMA_FTYPE_MOSTLY_F16 = 1, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors + LLAMA_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors + }; + LLAMA_API struct llama_context_params llama_context_default_params(); LLAMA_API bool llama_mmap_supported(); @@ -85,7 +93,7 @@ extern "C" { LLAMA_API int llama_model_quantize( const char * fname_inp, const char * fname_out, - int itype); + enum llama_ftype ftype); // Returns the KV cache that will contain the context for the // ongoing prediction with the model. |