aboutsummaryrefslogtreecommitdiff
path: root/pocs/vdot/q8dot.cpp
diff options
context:
space:
mode:
authorStephan Walter <stephan@walter.name>2023-07-05 16:13:06 +0000
committerGitHub <noreply@github.com>2023-07-05 19:13:06 +0300
commit1b107b8550dced48dc5f41184640061354226b96 (patch)
treea09a4c33c865828cd753c19af71c580f98735be5 /pocs/vdot/q8dot.cpp
parent8567c76b5326e862be0755a8dc1dd988223fcae3 (diff)
ggml : generalize `quantize_fns` for simpler FP16 handling (#1237)
* Generalize quantize_fns for simpler FP16 handling * Remove call to ggml_cuda_mul_mat_get_wsize * ci : disable FMA for mac os actions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'pocs/vdot/q8dot.cpp')
-rw-r--r--pocs/vdot/q8dot.cpp6
1 files changed, 3 insertions, 3 deletions
diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp
index 5748c8a..4e0e023 100644
--- a/pocs/vdot/q8dot.cpp
+++ b/pocs/vdot/q8dot.cpp
@@ -136,7 +136,7 @@ int main(int argc, char** argv) {
auto ggml_type = type == 0 ? GGML_TYPE_Q4_0 : GGML_TYPE_Q4_1;
- auto funcs = ggml_internal_get_quantize_fn(ggml_type);
+ auto funcs = ggml_internal_get_type_traits(ggml_type);
Stat simple, ggml;
@@ -156,8 +156,8 @@ int main(int argc, char** argv) {
t1 = std::chrono::high_resolution_clock::now();
float fs;
- if (type == 0) funcs.vec_dot_q(kVecSize * QK4_1, &fs, x40.data(), y.data());
- else funcs.vec_dot_q(kVecSize * QK4_1, &fs, x41.data(), y.data());
+ if (type == 0) funcs.vec_dot(kVecSize * QK4_1, &fs, x40.data(), y.data());
+ else funcs.vec_dot(kVecSize * QK4_1, &fs, x41.data(), y.data());
t2 = std::chrono::high_resolution_clock::now();
t = 1e-3*std::chrono::duration_cast<std::chrono::nanoseconds>(t2-t1).count();
if (iloop > 3) ggml.addResult(fs, t);