mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-01 00:39:00 +01:00
test-backend-ops : make experts more evenly probable (test_moe)
This commit is contained in:
parent
b0b83dd9e2
commit
54ba263410
@ -1172,7 +1172,7 @@ struct test_moe : public test_case {
|
|||||||
ggml_tensor * cur = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_tokens);
|
ggml_tensor * cur = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_tokens);
|
||||||
|
|
||||||
ggml_tensor * logits = ggml_mul_mat(ctx, ffn_gate_inp, cur); // [n_tokens, num_experts]
|
ggml_tensor * logits = ggml_mul_mat(ctx, ffn_gate_inp, cur); // [n_tokens, num_experts]
|
||||||
ggml_tensor * probs = ggml_soft_max(ctx, logits); // [n_tokens, num_experts]
|
ggml_tensor * probs = ggml_soft_max_ext(ctx, logits, nullptr, 1.0f/sqrtf(n_embd)); // [n_tokens, num_experts]
|
||||||
|
|
||||||
// select experts
|
// select experts
|
||||||
ggml_tensor * selected_experts = ggml_top_k(ctx, probs, n_experts_per_tok); // [n_tokens, num_experts_per_tok]
|
ggml_tensor * selected_experts = ggml_top_k(ctx, probs, n_experts_per_tok); // [n_tokens, num_experts_per_tok]
|
||||||
|
Loading…
Reference in New Issue
Block a user