From f0cbb6ddf6b314693cdc536994152bb85731f281 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Wed, 28 Feb 2024 08:28:10 +0200 Subject: [PATCH] iq1_s: turn off SIMD implementation for QK_K = 64 (it does not work) --- ggml-quants.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ggml-quants.c b/ggml-quants.c index b7d46303a..2407a518c 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -10291,7 +10291,8 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const const int nb = n / QK_K; -#if defined __ARM_NEON && QK_K != 64 + // TODO: implement for QK_K = 64 +#if defined __ARM_NEON && QK_K == 256 const uint8x16_t m8 = vdupq_n_u8(0x08); const uint8x16_t m7 = vdupq_n_u8(0x07); @@ -10348,7 +10349,8 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const *s = sumf; -#elif defined __AVX2__ + // TODO: implement for QK_K = 64 +#elif defined __AVX2__ && QK_K == 256 const __m128i m8 = _mm_set1_epi8(0x08); const __m128i m7 = _mm_set1_epi8(0x07);