Faster Q4_K on Metal (#2290)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
author: Kawrakow <48489457+ikawrakow@users.noreply.github.com> 2023-07-20 15:18:43 +0300
committer: GitHub <noreply@github.com> 2023-07-20 15:18:43 +0300
commit: 785829dfe8baf0213f2ff66963d28c62f92d7930 (patch)
tree: ae8080083db6275b0ed01aeeba3763633d7d0c33 /ggml-metal.m
parent: fff0e0eafe817eef429ecb64f892ab7bdae31846 (diff)
1 files changed, 4 insertions, 3 deletions
diff --git a/ggml-metal.m b/ggml-metal.m
index d80a380..5e2a211 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -694,8 +694,8 @@ void ggml_metal_graph_compute(
                                             GGML_ASSERT(ne02 == 1);
                                             GGML_ASSERT(ne12 == 1);
 
-                                            nth0 = 4;
-                                            nth1 = 16;
+                                            nth0 = 2;
+                                            nth1 = 32;
                                             [encoder setComputePipelineState:ctx->pipeline_mul_mat_q4_K_f32];
                                         } break;
                                     case GGML_TYPE_Q5_K:
@@ -739,7 +739,8 @@ void ggml_metal_graph_compute(
                                 [encoder setBytes:&ne0  length:sizeof(ne0)  atIndex:13];
                                 [encoder setBytes:&ne1  length:sizeof(ne1)  atIndex:14];
 
-                                if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1) {
+                                if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
+                                    src0t == GGML_TYPE_Q4_K) {
                                     [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7) / 8, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                 }
                                 else if (src0t == GGML_TYPE_Q2_K ||
author	Kawrakow <48489457+ikawrakow@users.noreply.github.com>	2023-07-20 15:18:43 +0300
committer	GitHub <noreply@github.com>	2023-07-20 15:18:43 +0300
commit	785829dfe8baf0213f2ff66963d28c62f92d7930 (patch)
tree	ae8080083db6275b0ed01aeeba3763633d7d0c33 /ggml-metal.m
parent	fff0e0eafe817eef429ecb64f892ab7bdae31846 (diff)