mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
ggml : fix Q5_0 quantization
This commit is contained in:
parent
60f62bbc85
commit
8fbf7777ce
2
ggml.c
2
ggml.c
@ -845,7 +845,7 @@ static void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * r
|
|||||||
const float d = max / -16;
|
const float d = max / -16;
|
||||||
const float id = d ? 1.0f/d : 0.0f;
|
const float id = d ? 1.0f/d : 0.0f;
|
||||||
|
|
||||||
y[i].d = d;
|
y[i].d = GGML_FP32_TO_FP16(d);
|
||||||
|
|
||||||
uint32_t qh = 0;
|
uint32_t qh = 0;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user