aboutsummaryrefslogtreecommitdiff
path: root/ggml-metal.m
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2023-07-20 15:18:43 +0300
committerGitHub <noreply@github.com>2023-07-20 15:18:43 +0300
commit785829dfe8baf0213f2ff66963d28c62f92d7930 (patch)
treeae8080083db6275b0ed01aeeba3763633d7d0c33 /ggml-metal.m
parentfff0e0eafe817eef429ecb64f892ab7bdae31846 (diff)
Faster Q4_K on Metal (#2290)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml-metal.m')
-rw-r--r--ggml-metal.m7
1 files changed, 4 insertions, 3 deletions
diff --git a/ggml-metal.m b/ggml-metal.m
index d80a380..5e2a211 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -694,8 +694,8 @@ void ggml_metal_graph_compute(
GGML_ASSERT(ne02 == 1);
GGML_ASSERT(ne12 == 1);
- nth0 = 4;
- nth1 = 16;
+ nth0 = 2;
+ nth1 = 32;
[encoder setComputePipelineState:ctx->pipeline_mul_mat_q4_K_f32];
} break;
case GGML_TYPE_Q5_K:
@@ -739,7 +739,8 @@ void ggml_metal_graph_compute(
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:13];
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:14];
- if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1) {
+ if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
+ src0t == GGML_TYPE_Q4_K) {
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7) / 8, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
}
else if (src0t == GGML_TYPE_Q2_K ||