From 54ba2634108787f71016e8329b93778957f801f7 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 10 Dec 2023 15:27:41 +0200 Subject: [PATCH] test-backend-ops : make experts more evenly probable (test_moe) --- tests/test-backend-ops.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 2f7ea4edf..539f7f71b 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -1172,7 +1172,7 @@ struct test_moe : public test_case { ggml_tensor * cur = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_tokens); ggml_tensor * logits = ggml_mul_mat(ctx, ffn_gate_inp, cur); // [n_tokens, num_experts] - ggml_tensor * probs = ggml_soft_max(ctx, logits); // [n_tokens, num_experts] + ggml_tensor * probs = ggml_soft_max_ext(ctx, logits, nullptr, 1.0f/sqrtf(n_embd)); // [n_tokens, num_experts] // select experts ggml_tensor * selected_experts = ggml_top_k(ctx, probs, n_experts_per_tok); // [n_tokens, num_experts_per_tok]