ggml : use less ggml_mul tasks when src0 rows are few

2025-01-01 00:39:00 +01:00 · 2023-08-30 19:37:26 +03:00 · 2023-08-30 19:37:26 +03:00 · df54d2f1d4
commit df54d2f1d4
parent 253eab8ae1
1 changed files with 9 additions and 2 deletions
--- a/ggml.c
+++ b/ggml.c
@ -9329,11 +9329,12 @@ static void ggml_compute_forward_mul_f32(
        struct ggml_tensor * dst) {
    GGML_ASSERT(ggml_can_repeat_rows(src1, src0) && ggml_are_same_shape(src0, dst));
    const int ith = params->ith;
    const int nth = params->nth;
    if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
        return;
    }
    const int ith = params->ith;
    const int nth = params->nth;
 #ifdef GGML_USE_CLBLAST
    if (src1->backend == GGML_BACKEND_GPU) {
@ -17229,7 +17230,13 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
                    }
                } break;
            case GGML_OP_SILU_BACK:
                {
                    n_tasks = n_threads;
                } break;
            case GGML_OP_MUL:
                {
                    n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
                } break;
            case GGML_OP_NORM:
            case GGML_OP_RMS_NORM:
            case GGML_OP_RMS_NORM_BACK: