From 783379670a38cb5f748073d58e820d9352a5a0f3 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Thu, 7 Sep 2023 15:20:07 +0300
Subject: [PATCH] metal : restore original F16 mat-vec multiplication

It works after the norm fixes
---
 ggml-metal.m | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/ggml-metal.m b/ggml-metal.m
index 5135e1cbb..521ca180f 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -995,12 +995,8 @@ void ggml_metal_graph_compute(
                                 else if (src0t == GGML_TYPE_Q6_K) {
                                     [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                 } else {
-                                    [encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
-                                    [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
-
-                                    // TODO: this breaks for Q4_0 - understand why and fix it
-                                    //int64_t ny = (ne11 + 3)/4;
-                                    //[encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
+                                    int64_t ny = (ne11 + 3)/4;
+                                    [encoder dispatchThreadgroups:MTLSizeMake(ne01, ny, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                 }
                             }
                         } break;