ggml : do not use BLAS with ggml_mul_mat_id

2025-02-06 16:40:34 +01:00 · 2023-12-12 20:05:58 +02:00 · 2023-12-12 20:05:58 +02:00 · 90c12e6b3c
commit 90c12e6b3c
parent ea4402bb0e
1 changed files with 4 additions and 1 deletions
--- a/ggml.c
+++ b/ggml.c
@ -9508,8 +9508,11 @@ static bool ggml_compute_forward_mul_mat_use_blas(
    const int64_t ne0 = dst->ne[0];
    const int64_t ne1 = dst->ne[1];

+    // NOTE: with GGML_OP_MUL_MAT_ID we don't want to go through the BLAS branch because it will dequantize (to_float)
+    //       all the experts for each batch element and the processing would become incredibly slow
    // TODO: find the optimal values for these
-    if (ggml_is_contiguous(src0) &&
+    if (dst->op != GGML_OP_MUL_MAT_ID &&
+        ggml_is_contiguous(src0) &&
        ggml_is_contiguous(src1) &&
      //src0->type == GGML_TYPE_F32 &&
        src1->type == GGML_TYPE_F32 &&