From 4c72ab13b20c1589c3af7fc18d14299c983ad556 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 28 Sep 2023 16:02:20 +0300 Subject: [PATCH] metal : use mm kernels for batch size > 2 --- ggml-metal.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml-metal.m b/ggml-metal.m index 71d69d38d..b3c463f03 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -958,7 +958,7 @@ void ggml_metal_graph_compute( src1t == GGML_TYPE_F32 && [ctx->device supportsFamily:MTLGPUFamilyApple7] && ne00%32 == 0 && - ne11 > 1) { + ne11 > 2) { switch (src0->type) { case GGML_TYPE_F32: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f32_f32]; break; case GGML_TYPE_F16: [encoder setComputePipelineState:ctx->pipeline_mul_mm_f16_f32]; break;