mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 22:08:55 +01:00
add avx2 for dot_q8_0_q8_0, 2x faster than scalar (#1211)
This commit is contained in:
parent
0b2da20538
commit
04aaae1d79
18
ggml.c
18
ggml.c
@ -3626,6 +3626,24 @@ static void ggml_vec_dot_q8_0_q8_0(const int n, float * restrict s, const void *
|
|||||||
}
|
}
|
||||||
|
|
||||||
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1);
|
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1);
|
||||||
|
#elif defined(__AVX2__)
|
||||||
|
// Initialize accumulator with zeros
|
||||||
|
__m256 acc = _mm256_setzero_ps();
|
||||||
|
|
||||||
|
// Main loop
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
// Compute combined scale for the block
|
||||||
|
const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[i].d ), _mm256_broadcast_ss( &y[i].d ) );
|
||||||
|
__m256i bx = _mm256_loadu_si256((const __m256i *)x[i].qs);
|
||||||
|
__m256i by = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
||||||
|
|
||||||
|
const __m256 q = mul_sum_i8_pairs_float(bx, by);
|
||||||
|
|
||||||
|
// Multiply q with scale and accumulate
|
||||||
|
acc = _mm256_fmadd_ps( d, q, acc );
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = hsum_float_8(acc);
|
||||||
#else
|
#else
|
||||||
// scalar
|
// scalar
|
||||||
float sumf = 0.0;
|
float sumf = 0.0;
|
||||||
|
Loading…
Reference in New Issue
Block a user