mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
ggml : fix unnecessary f32 -> f16 -> f32 casts (mmla) (#5951)
This commit is contained in:
parent
58308a0ecc
commit
8380ecfb21
@ -4059,10 +4059,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
|||||||
const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16);
|
const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16);
|
||||||
|
|
||||||
// mmla into int32x4_t
|
// mmla into int32x4_t
|
||||||
float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d),
|
float32x4_t scale = {GGML_FP16_TO_FP32(b_x0->d)*b_y0->d,
|
||||||
GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d),
|
GGML_FP16_TO_FP32(b_x0->d)*b_y1->d,
|
||||||
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d),
|
GGML_FP16_TO_FP32(b_x1->d)*b_y0->d,
|
||||||
GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d)};
|
GGML_FP16_TO_FP32(b_x1->d)*b_y1->d};
|
||||||
|
|
||||||
int8x16_t l0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));
|
int8x16_t l0 = vreinterpretq_s8_s64(vzip1q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));
|
||||||
int8x16_t l1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));
|
int8x16_t l1 = vreinterpretq_s8_s64(vzip2q_s64(vreinterpretq_s64_s8(x0_l), vreinterpretq_s64_s8(x1_l)));
|
||||||
|
Loading…
Reference in New Issue
Block a user