From e1241d9b461816b679eaf6951631287687a18f66 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 13 Dec 2023 13:56:45 +0200 Subject: [PATCH] metal : switch to execution barriers + fix one of the barriers --- ggml-metal.metal | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/ggml-metal.metal b/ggml-metal.metal index 8b76f969c..773fac124 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -385,8 +385,11 @@ kernel void kernel_soft_max( pdst[i00] = exp_psrc0; } + // This barrier fixes a failing test + // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335 + threadgroup_barrier(mem_flags::mem_none); + float sum = simd_sum(lsum); - threadgroup_barrier(mem_flags::mem_threadgroup); if (ntg > N_SIMDWIDTH) { if (sgitg == 0) { @@ -470,9 +473,13 @@ kernel void kernel_soft_max_4( } const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3]; - threadgroup_barrier(mem_flags::mem_threadgroup); + + // This barrier fixes a failing test + // ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335 + threadgroup_barrier(mem_flags::mem_none); float sum = simd_sum(lsum); + if (ntg > N_SIMDWIDTH) { if (sgitg == 0) { buf[tiisg] = 0.0f;