mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-14 14:28:58 +01:00
iq3_s_mult: back to blocks of 32
This commit is contained in:
parent
f4cb4eac45
commit
e5e72562c5
@ -201,7 +201,7 @@ typedef struct {
|
|||||||
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
||||||
|
|
||||||
// 3.4375 bpw
|
// 3.4375 bpw
|
||||||
#define IQ3S_BLOCK_SIZE 16
|
#define IQ3S_BLOCK_SIZE 32
|
||||||
#if QK_K == 64
|
#if QK_K == 64
|
||||||
#define IQ3S_N_SCALE 2
|
#define IQ3S_N_SCALE 2
|
||||||
#else
|
#else
|
||||||
|
Loading…
Reference in New Issue
Block a user