mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-29 07:34:18 +01:00
test-backend-ops : make experts more evenly probable (test_moe)
This commit is contained in:
parent
b0b83dd9e2
commit
54ba263410
@ -1172,7 +1172,7 @@ struct test_moe : public test_case {
|
||||
ggml_tensor * cur = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_tokens);
|
||||
|
||||
ggml_tensor * logits = ggml_mul_mat(ctx, ffn_gate_inp, cur); // [n_tokens, num_experts]
|
||||
ggml_tensor * probs = ggml_soft_max(ctx, logits); // [n_tokens, num_experts]
|
||||
ggml_tensor * probs = ggml_soft_max_ext(ctx, logits, nullptr, 1.0f/sqrtf(n_embd)); // [n_tokens, num_experts]
|
||||
|
||||
// select experts
|
||||
ggml_tensor * selected_experts = ggml_top_k(ctx, probs, n_experts_per_tok); // [n_tokens, num_experts_per_tok]
|
||||
|
Loading…
Reference in New Issue
Block a user