cuda : unroll Q*K^T loop

2024-12-29 07:34:18 +01:00 · 2024-02-03 16:12:20 +02:00 · 2024-02-03 16:12:20 +02:00 · 5b263dd83a
commit 5b263dd83a
parent 3b1c4e7673
1 changed files with 1 additions and 0 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -6571,6 +6571,7 @@ static __global__ void flash_attn_ext_f16(
            // Q*K^T
            {
 #pragma unroll
                for (int cc = 0; cc < C/16; ++cc) {
                    half16x16_acc mqk[Q16];
                    for (int j = 0; j < Q16; ++j) {