diff options
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 4 |
1 files changed, 4 insertions, 0 deletions
@@ -484,6 +484,7 @@ struct llama_file_loader { case GGML_TYPE_Q4_1: case GGML_TYPE_Q4_2: case GGML_TYPE_Q4_3: + case GGML_TYPE_Q8_0: break; default: { throw format("unrecognized tensor type %u\n", shard.type); @@ -558,6 +559,7 @@ struct llama_file_saver { case GGML_TYPE_Q4_1: case GGML_TYPE_Q4_2: case GGML_TYPE_Q4_3: + case GGML_TYPE_Q8_0: break; default: LLAMA_ASSERT(false); } @@ -848,6 +850,7 @@ static const char *llama_ftype_name(enum llama_ftype ftype) { return "mostly Q4_1, some F16"; case LLAMA_FTYPE_MOSTLY_Q4_2: return "mostly Q4_2"; case LLAMA_FTYPE_MOSTLY_Q4_3: return "mostly Q4_3"; + case LLAMA_FTYPE_MOSTLY_Q8_0: return "mostly Q8_0"; default: return "unknown, may not work"; } } @@ -1585,6 +1588,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s case LLAMA_FTYPE_MOSTLY_Q4_1: quantized_type = GGML_TYPE_Q4_1; break; case LLAMA_FTYPE_MOSTLY_Q4_2: quantized_type = GGML_TYPE_Q4_2; break; case LLAMA_FTYPE_MOSTLY_Q4_3: quantized_type = GGML_TYPE_Q4_3; break; + case LLAMA_FTYPE_MOSTLY_Q8_0: quantized_type = GGML_TYPE_Q8_0; break; default: throw format("invalid output file type %d\n", ftype); }; |