aboutsummaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
authorunbounded <haakon@likedan.net>2023-04-08 00:09:18 +0200
committerGitHub <noreply@github.com>2023-04-08 00:09:18 +0200
commit62cfc54f77e519057110265b52b0d614fa363e2a (patch)
tree899b22495ef99e5e1661891c7ec0fd6de45aa43e /ggml.c
parent698f7b5d6316a1f8453b3b32fd0d637d24952ffd (diff)
Add quantize-stats command for testing quantization (#728)
Command that calculates some statistics over the errors introduced by quantization, like mean square error, max error and some percentile errors for layer weights. Should be useful for testing quantization improvements. Exposes some internal state from ggml and llama for testing
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c30
1 files changed, 14 insertions, 16 deletions
diff --git a/ggml.c b/ggml.c
index 8a60bc3..dc084e6 100644
--- a/ggml.c
+++ b/ggml.c
@@ -6564,29 +6564,27 @@ static void ggml_compute_forward_mul_mat_f16_f32(
//}
}
-typedef void (*dequantize_row_q_t)(const void * restrict x, float * restrict y, int k);
-typedef void (*quantize_row_q_t)(const float * restrict x, void * restrict y, int k);
-typedef void (*vec_dot_q_t)(const int n, float * restrict s, const void * restrict x, const void * restrict y);
-
-typedef struct {
- dequantize_row_q_t dequantize_row_q;
- quantize_row_q_t quantize_row_q;
- vec_dot_q_t vec_dot_q;
-} quantize_fns_t;
-
static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
[GGML_TYPE_Q4_0] = {
- .dequantize_row_q = dequantize_row_q4_0,
- .quantize_row_q = quantize_row_q4_0,
- .vec_dot_q = ggml_vec_dot_q4_0,
+ .dequantize_row_q = dequantize_row_q4_0,
+ .quantize_row_q = quantize_row_q4_0,
+ .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_0_reference,
+ .vec_dot_q = ggml_vec_dot_q4_0,
},
[GGML_TYPE_Q4_1] = {
- .dequantize_row_q = dequantize_row_q4_1,
- .quantize_row_q = quantize_row_q4_1,
- .vec_dot_q = ggml_vec_dot_q4_1,
+ .dequantize_row_q = dequantize_row_q4_1,
+ .quantize_row_q = quantize_row_q4_1,
+ .quantize_row_q_reference = (quantize_row_q_t) quantize_row_q4_1_reference,
+ .vec_dot_q = ggml_vec_dot_q4_1,
},
};
+// For internal test use
+quantize_fns_t ggml_internal_get_quantize_fn(size_t i) {
+ GGML_ASSERT(i < GGML_TYPE_COUNT);
+ return quantize_fns[i];
+}
+
static void ggml_compute_forward_mul_mat_q_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,