metal : another fix for the fa kernel

2024-12-25 13:58:46 +01:00 · 2024-08-26 14:55:28 +03:00 · 2024-08-26 14:55:28 +03:00 · a95225cdfd
commit a95225cdfd
parent 7a3df798fc
1 changed files with 13 additions and 11 deletions
--- a/ggml/src/ggml-metal.metal
+++ b/ggml/src/ggml-metal.metal
@ -2144,6 +2144,7 @@ kernel void kernel_flash_attn_ext_f16(
                    const short tx = tiisg%4;
                    const short ty = tiisg/4;

+                    if (iq1 + ty < ne01) {
                        // mqk = mqk*scale
                        ss[8*cc + ty*TF + 2*tx + 0] *= scale;
                        ss[8*cc + ty*TF + 2*tx + 1] *= scale;
@ -2160,6 +2161,7 @@ kernel void kernel_flash_attn_ext_f16(
                        }
                    }
                }
+            }

            // used to detect blocks full of -INF
            float smax = -INFINITY;