ggml : do not use BLAS with ggml_mul_mat_id

This commit is contained in:
Georgi Gerganov 2023-12-12 20:05:58 +02:00
parent ea4402bb0e
commit 90c12e6b3c
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

5
ggml.c
View File

@ -9508,8 +9508,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(
const int64_t ne0 = dst->ne[0]; const int64_t ne0 = dst->ne[0];
const int64_t ne1 = dst->ne[1]; const int64_t ne1 = dst->ne[1];
// NOTE: with GGML_OP_MUL_MAT_ID we don't want to go through the BLAS branch because it will dequantize (to_float)
// all the experts for each batch element and the processing would become incredibly slow
// TODO: find the optimal values for these // TODO: find the optimal values for these
if (ggml_is_contiguous(src0) && if (dst->op != GGML_OP_MUL_MAT_ID &&
ggml_is_contiguous(src0) &&
ggml_is_contiguous(src1) && ggml_is_contiguous(src1) &&
//src0->type == GGML_TYPE_F32 && //src0->type == GGML_TYPE_F32 &&
src1->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 &&