mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-29 07:34:18 +01:00
convert : use 1e6 rope_freq_base for mixtral
This commit is contained in:
parent
296c945de5
commit
7dc75e3923
@ -259,6 +259,7 @@ class Params:
|
||||
|
||||
n_experts = None
|
||||
n_experts_used = None
|
||||
f_rope_freq_base = None
|
||||
|
||||
# hack to determine LLaMA v1 vs v2 vs CodeLlama
|
||||
if config.get("moe"):
|
||||
@ -281,6 +282,8 @@ class Params:
|
||||
n_ff = model["layers.0.feed_forward.experts.0.w1.weight"].shape[0]
|
||||
n_experts = config["moe"]["num_experts"]
|
||||
n_experts_used = config["moe"]["num_experts_per_tok"]
|
||||
f_rope_freq_base = 1e6
|
||||
|
||||
|
||||
return Params(
|
||||
n_vocab = model["tok_embeddings.weight"].shape[0],
|
||||
@ -293,7 +296,7 @@ class Params:
|
||||
n_experts = n_experts,
|
||||
n_experts_used = n_experts_used,
|
||||
f_norm_eps = config["norm_eps"],
|
||||
f_rope_freq_base = config.get("rope_theta"),
|
||||
f_rope_freq_base = config.get("rope_theta", f_rope_freq_base),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
Loading…
Reference in New Issue
Block a user