mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-15 14:50:51 +01:00
iq1_s: Metal works, but quite slow
As usual, Apple Silicon does not like the code I write.
This commit is contained in:
parent
020b548ec3
commit
425c6bbb6c
@ -4399,7 +4399,7 @@ void kernel_mul_mv_iq1_s_f32_impl(
|
|||||||
for (int row = 0; row < N_DST; ++row) {
|
for (int row = 0; row < N_DST; ++row) {
|
||||||
all_sum = simd_sum(sumf[row]);
|
all_sum = simd_sum(sumf[row]);
|
||||||
if (tiisg == 0) {
|
if (tiisg == 0) {
|
||||||
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum * 0.5f;
|
dst[r1*ne0 + im*ne0*ne1 + first_row + row] = all_sum;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user