diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2023-07-20 15:18:43 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-20 15:18:43 +0300 |
commit | 785829dfe8baf0213f2ff66963d28c62f92d7930 (patch) | |
tree | ae8080083db6275b0ed01aeeba3763633d7d0c33 /ggml-metal.m | |
parent | fff0e0eafe817eef429ecb64f892ab7bdae31846 (diff) |
Faster Q4_K on Metal (#2290)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml-metal.m')
-rw-r--r-- | ggml-metal.m | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/ggml-metal.m b/ggml-metal.m index d80a380..5e2a211 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -694,8 +694,8 @@ void ggml_metal_graph_compute( GGML_ASSERT(ne02 == 1); GGML_ASSERT(ne12 == 1); - nth0 = 4; - nth1 = 16; + nth0 = 2; + nth1 = 32; [encoder setComputePipelineState:ctx->pipeline_mul_mat_q4_K_f32]; } break; case GGML_TYPE_Q5_K: @@ -739,7 +739,8 @@ void ggml_metal_graph_compute( [encoder setBytes:&ne0 length:sizeof(ne0) atIndex:13]; [encoder setBytes:&ne1 length:sizeof(ne1) atIndex:14]; - if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1) { + if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 || + src0t == GGML_TYPE_Q4_K) { [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 7) / 8, ne11, 1) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)]; } else if (src0t == GGML_TYPE_Q2_K || |