ggml : generalize `quantize_fns` for simpler FP16 handling (#1237)

* Generalize quantize_fns for simpler FP16 handling * Remove call to ggml_cuda_mul_mat_get_wsize * ci : disable FMA for mac os actions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Stephan Walter <stephan@walter.name> 2023-07-05 16:13:06 +0000
committer: GitHub <noreply@github.com> 2023-07-05 19:13:06 +0300
commit: 1b107b8550dced48dc5f41184640061354226b96 (patch)
tree: a09a4c33c865828cd753c19af71c580f98735be5 /ggml.h
parent: 8567c76b5326e862be0755a8dc1dd988223fcae3 (diff)
1 files changed, 13 insertions, 20 deletions
diff --git a/ggml.h b/ggml.h
index 0af96c7..24ca8ae 100644
--- a/ggml.h
+++ b/ggml.h
@@ -250,8 +250,8 @@ extern "C" {
     GGML_API float       ggml_fp16_to_fp32(ggml_fp16_t x);
     GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
 
-    GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
-    GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
+    GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n);
+    GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n);
 
     struct ggml_object;
     struct ggml_context;
@@ -1514,26 +1514,19 @@ extern "C" {
     // Internal types and functions exposed for tests and benchmarks
     //
 
-#ifdef  __cplusplus
-    // restrict not standard in C++
-#define GGML_RESTRICT
-#else
-#define GGML_RESTRICT restrict
-#endif
-    typedef void (*dequantize_row_q_t)(const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
-    typedef void (*quantize_row_q_t)  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
-    typedef void (*vec_dot_q_t)       (const int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT x, const void * GGML_RESTRICT y);
+    typedef void (*ggml_to_float_t)(const void * x, float * y, int k);
+    typedef void (*ggml_from_float_t)(const float * x, void * y, int k);
+    typedef void (*ggml_vec_dot_t)(const int n, float * s, const void * x, const void * y);
 
     typedef struct {
-        dequantize_row_q_t dequantize_row_q;
-        quantize_row_q_t   quantize_row_q;
-        quantize_row_q_t   quantize_row_q_reference;
-        quantize_row_q_t   quantize_row_q_dot;
-        vec_dot_q_t        vec_dot_q;
-        enum ggml_type     vec_dot_type;
-    } quantize_fns_t;
-
-    quantize_fns_t ggml_internal_get_quantize_fn(size_t i);
+        ggml_to_float_t   to_float;
+        ggml_from_float_t from_float;
+        ggml_from_float_t from_float_reference;
+        ggml_vec_dot_t    vec_dot;
+        enum ggml_type    vec_dot_type;
+    } ggml_type_traits_t;
+
+    ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type i);
 
 #ifdef  __cplusplus
 }
author	Stephan Walter <stephan@walter.name>	2023-07-05 16:13:06 +0000
committer	GitHub <noreply@github.com>	2023-07-05 19:13:06 +0300
commit	1b107b8550dced48dc5f41184640061354226b96 (patch)
tree	a09a4c33c865828cd753c19af71c580f98735be5 /ggml.h
parent	8567c76b5326e862be0755a8dc1dd988223fcae3 (diff)