From 3dfda05956befb350745c5c2f7134d06adfe8724 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 15 Jul 2024 14:10:39 +0300 Subject: [PATCH] llama : de-duplicate deepseek2 norm --- src/llama.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 400a4232b..ddf0262d4 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -12873,12 +12873,12 @@ struct llm_build_context { struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); cb(ffn_inp, "ffn_inp", il); - if ((uint32_t) il < hparams.n_layer_dense_lead) { - cur = llm_build_norm(ctx0, ffn_inp, hparams, - model.layers[il].ffn_norm, NULL, - LLM_NORM_RMS, cb, il); - cb(cur, "ffn_norm", il); + cur = llm_build_norm(ctx0, ffn_inp, hparams, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, cb, il); + cb(cur, "ffn_norm", il); + if ((uint32_t) il < hparams.n_layer_dense_lead) { cur = llm_build_ffn(ctx0, cur, model.layers[il].ffn_up, NULL, NULL, model.layers[il].ffn_gate, NULL, NULL, @@ -12888,11 +12888,6 @@ struct llm_build_context { cb(cur, "ffn_out", il); } else { // MoE branch - cur = llm_build_norm(ctx0, ffn_inp, hparams, - model.layers[il].ffn_norm, NULL, - LLM_NORM_RMS, cb, il); - cb(cur, "ffn_norm", il); - ggml_tensor * moe_out = llm_build_moe_ffn(ctx0, cur, model.layers[il].ffn_gate_inp,