mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-28 15:18:26 +01:00
ggml : do not use BLAS with ggml_mul_mat_id
This commit is contained in:
parent
ea4402bb0e
commit
90c12e6b3c
5
ggml.c
5
ggml.c
@ -9508,8 +9508,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
||||
const int64_t ne0 = dst->ne[0];
|
||||
const int64_t ne1 = dst->ne[1];
|
||||
|
||||
// NOTE: with GGML_OP_MUL_MAT_ID we don't want to go through the BLAS branch because it will dequantize (to_float)
|
||||
// all the experts for each batch element and the processing would become incredibly slow
|
||||
// TODO: find the optimal values for these
|
||||
if (ggml_is_contiguous(src0) &&
|
||||
if (dst->op != GGML_OP_MUL_MAT_ID &&
|
||||
ggml_is_contiguous(src0) &&
|
||||
ggml_is_contiguous(src1) &&
|
||||
//src0->type == GGML_TYPE_F32 &&
|
||||
src1->type == GGML_TYPE_F32 &&
|
||||
|
Loading…
Reference in New Issue
Block a user