mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-29 07:34:18 +01:00
ggml : do not use BLAS with ggml_mul_mat_id
This commit is contained in:
parent
ea4402bb0e
commit
90c12e6b3c
5
ggml.c
5
ggml.c
@ -9508,8 +9508,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|||||||
const int64_t ne0 = dst->ne[0];
|
const int64_t ne0 = dst->ne[0];
|
||||||
const int64_t ne1 = dst->ne[1];
|
const int64_t ne1 = dst->ne[1];
|
||||||
|
|
||||||
|
// NOTE: with GGML_OP_MUL_MAT_ID we don't want to go through the BLAS branch because it will dequantize (to_float)
|
||||||
|
// all the experts for each batch element and the processing would become incredibly slow
|
||||||
// TODO: find the optimal values for these
|
// TODO: find the optimal values for these
|
||||||
if (ggml_is_contiguous(src0) &&
|
if (dst->op != GGML_OP_MUL_MAT_ID &&
|
||||||
|
ggml_is_contiguous(src0) &&
|
||||||
ggml_is_contiguous(src1) &&
|
ggml_is_contiguous(src1) &&
|
||||||
//src0->type == GGML_TYPE_F32 &&
|
//src0->type == GGML_TYPE_F32 &&
|
||||||
src1->type == GGML_TYPE_F32 &&
|
src1->type == GGML_TYPE_F32 &&
|
||||||
|
Loading…
Reference in New Issue
Block a user