mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 13:27:21 +01:00
Fix scalar version of Q5_K when QK_K = 64 (#2362)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
parent
84e09a7d8b
commit
42f70cb2f6
@ -3297,8 +3297,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
||||
|
||||
#else
|
||||
|
||||
|
||||
uint8_t aux8[QK_K];
|
||||
int8_t aux8[QK_K];
|
||||
int16_t aux16[16];
|
||||
float sums [8];
|
||||
memset(sums, 0, 8*sizeof(float));
|
||||
@ -3308,7 +3307,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
||||
const uint8_t * restrict q4 = x[i].qs;
|
||||
const uint8_t * restrict hm = x[i].qh;
|
||||
const int8_t * restrict q8 = y[i].qs;
|
||||
uint8_t * restrict a = aux8;
|
||||
int8_t * restrict a = aux8;
|
||||
for (int l = 0; l < 32; ++l) {
|
||||
a[l+ 0] = q4[l] & 0xF;
|
||||
a[l+32] = q4[l] >> 4;
|
||||
|
Loading…
x
Reference in New Issue
Block a user