mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 21:37:19 +01:00
Fix scalar version of Q5_K when QK_K = 64 (#2362)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
parent
84e09a7d8b
commit
42f70cb2f6
@ -3297,8 +3297,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
int8_t aux8[QK_K];
|
||||||
uint8_t aux8[QK_K];
|
|
||||||
int16_t aux16[16];
|
int16_t aux16[16];
|
||||||
float sums [8];
|
float sums [8];
|
||||||
memset(sums, 0, 8*sizeof(float));
|
memset(sums, 0, 8*sizeof(float));
|
||||||
@ -3308,7 +3307,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const uint8_t * restrict q4 = x[i].qs;
|
const uint8_t * restrict q4 = x[i].qs;
|
||||||
const uint8_t * restrict hm = x[i].qh;
|
const uint8_t * restrict hm = x[i].qh;
|
||||||
const int8_t * restrict q8 = y[i].qs;
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
uint8_t * restrict a = aux8;
|
int8_t * restrict a = aux8;
|
||||||
for (int l = 0; l < 32; ++l) {
|
for (int l = 0; l < 32; ++l) {
|
||||||
a[l+ 0] = q4[l] & 0xF;
|
a[l+ 0] = q4[l] & 0xF;
|
||||||
a[l+32] = q4[l] >> 4;
|
a[l+32] = q4[l] >> 4;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user