mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-10-30 06:30:15 +01:00
ggml : fix q2_k bpw in comments (ggml/680)
This commit is contained in:
parent
1bf681f90e
commit
d061bf9405
@ -70,7 +70,7 @@ static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block s
|
|||||||
// 2-bit quantization
|
// 2-bit quantization
|
||||||
// weight is represented as x = a * q + b
|
// weight is represented as x = a * q + b
|
||||||
// 16 blocks of 16 elements each
|
// 16 blocks of 16 elements each
|
||||||
// Effectively 2.5625 bits per weight
|
// Effectively 2.625 bits per weight
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||||
uint8_t qs[QK_K/4]; // quants
|
uint8_t qs[QK_K/4]; // quants
|
||||||
|
Loading…
Reference in New Issue
Block a user