mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 05:17:21 +01:00
ggml : support AVX512VNNI (#6280)
This change causes some quants (e.g. Q4_0, Q8_0) to go faster on some architectures (e.g. AMD Zen 4).
This commit is contained in:
parent
a32b77c4b2
commit
7733f0c760
@ -132,7 +132,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
|
||||
}
|
||||
|
||||
static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) {
|
||||
#if __AVXVNNI__
|
||||
#if defined(__AVXVNNI__) || defined(__AVX512VNNI__)
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy);
|
||||
return _mm256_cvtepi32_ps(summed_pairs);
|
||||
|
Loading…
x
Reference in New Issue
Block a user