metal : switch to execution barriers + fix one of the barriers

This commit is contained in:
Georgi Gerganov 2023-12-13 13:56:45 +02:00
parent 109e7aa8ac
commit e1241d9b46
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -385,8 +385,11 @@ kernel void kernel_soft_max(
pdst[i00] = exp_psrc0;
}
// This barrier fixes a failing test
// ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335
threadgroup_barrier(mem_flags::mem_none);
float sum = simd_sum(lsum);
threadgroup_barrier(mem_flags::mem_threadgroup);
if (ntg > N_SIMDWIDTH) {
if (sgitg == 0) {
@ -470,9 +473,13 @@ kernel void kernel_soft_max_4(
}
const float lsum = lsum4[0] + lsum4[1] + lsum4[2] + lsum4[3];
threadgroup_barrier(mem_flags::mem_threadgroup);
// This barrier fixes a failing test
// ref: https://github.com/ggerganov/ggml/pull/621#discussion_r1425156335
threadgroup_barrier(mem_flags::mem_none);
float sum = simd_sum(lsum);
if (ntg > N_SIMDWIDTH) {
if (sgitg == 0) {
buf[tiisg] = 0.0f;