mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 06:10:29 +01:00
Rename Olmo1124 to Olmo2 (#10500)
This commit is contained in:
parent
10bce0450f
commit
80acb7b430
@ -3040,9 +3040,9 @@ class OlmoModel(Model):
|
|||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
||||||
@Model.register("Olmo1124ForCausalLM")
|
@Model.register("Olmo2ForCausalLM")
|
||||||
class Olmo1124Model(Model):
|
class Olmo2Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.OLMO_1124
|
model_arch = gguf.MODEL_ARCH.OLMO2
|
||||||
|
|
||||||
|
|
||||||
@Model.register("OlmoeForCausalLM")
|
@Model.register("OlmoeForCausalLM")
|
||||||
|
@ -243,7 +243,7 @@ class MODEL_ARCH(IntEnum):
|
|||||||
COMMAND_R = auto()
|
COMMAND_R = auto()
|
||||||
DBRX = auto()
|
DBRX = auto()
|
||||||
OLMO = auto()
|
OLMO = auto()
|
||||||
OLMO_1124 = auto()
|
OLMO2 = auto()
|
||||||
OLMOE = auto()
|
OLMOE = auto()
|
||||||
OPENELM = auto()
|
OPENELM = auto()
|
||||||
ARCTIC = auto()
|
ARCTIC = auto()
|
||||||
@ -405,7 +405,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|||||||
MODEL_ARCH.COMMAND_R: "command-r",
|
MODEL_ARCH.COMMAND_R: "command-r",
|
||||||
MODEL_ARCH.DBRX: "dbrx",
|
MODEL_ARCH.DBRX: "dbrx",
|
||||||
MODEL_ARCH.OLMO: "olmo",
|
MODEL_ARCH.OLMO: "olmo",
|
||||||
MODEL_ARCH.OLMO_1124: "olmo_1124",
|
MODEL_ARCH.OLMO2: "olmo2",
|
||||||
MODEL_ARCH.OLMOE: "olmoe",
|
MODEL_ARCH.OLMOE: "olmoe",
|
||||||
MODEL_ARCH.OPENELM: "openelm",
|
MODEL_ARCH.OPENELM: "openelm",
|
||||||
MODEL_ARCH.ARCTIC: "arctic",
|
MODEL_ARCH.ARCTIC: "arctic",
|
||||||
@ -1071,7 +1071,7 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||||||
MODEL_TENSOR.FFN_DOWN,
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
MODEL_TENSOR.FFN_UP,
|
MODEL_TENSOR.FFN_UP,
|
||||||
],
|
],
|
||||||
MODEL_ARCH.OLMO_1124: [
|
MODEL_ARCH.OLMO2: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.OUTPUT_NORM,
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
MODEL_TENSOR.OUTPUT,
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
@ -13,7 +13,7 @@ class TensorNameMap:
|
|||||||
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
||||||
"transformer.word_embeddings", # falcon
|
"transformer.word_embeddings", # falcon
|
||||||
"word_embeddings", # bloom
|
"word_embeddings", # bloom
|
||||||
"model.embed_tokens", # llama-hf nemotron olmoe olmo_1124
|
"model.embed_tokens", # llama-hf nemotron olmoe olmo2
|
||||||
"tok_embeddings", # llama-pth
|
"tok_embeddings", # llama-pth
|
||||||
"embeddings.word_embeddings", # bert nomic-bert
|
"embeddings.word_embeddings", # bert nomic-bert
|
||||||
"language_model.embedding.word_embeddings", # persimmon
|
"language_model.embedding.word_embeddings", # persimmon
|
||||||
@ -54,7 +54,7 @@ class TensorNameMap:
|
|||||||
# Output
|
# Output
|
||||||
MODEL_TENSOR.OUTPUT: (
|
MODEL_TENSOR.OUTPUT: (
|
||||||
"embed_out", # gptneox
|
"embed_out", # gptneox
|
||||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo_1124
|
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2
|
||||||
"output", # llama-pth bloom internlm2
|
"output", # llama-pth bloom internlm2
|
||||||
"word_embeddings_for_head", # persimmon
|
"word_embeddings_for_head", # persimmon
|
||||||
"lm_head.linear", # phi2
|
"lm_head.linear", # phi2
|
||||||
@ -66,7 +66,7 @@ class TensorNameMap:
|
|||||||
MODEL_TENSOR.OUTPUT_NORM: (
|
MODEL_TENSOR.OUTPUT_NORM: (
|
||||||
"gpt_neox.final_layer_norm", # gptneox
|
"gpt_neox.final_layer_norm", # gptneox
|
||||||
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
"transformer.ln_f", # gpt2 gpt-j falcon jais exaone
|
||||||
"model.norm", # llama-hf baichuan internlm2 olmoe olmo_1124
|
"model.norm", # llama-hf baichuan internlm2 olmoe olmo2
|
||||||
"norm", # llama-pth
|
"norm", # llama-pth
|
||||||
"transformer.norm_f", # mpt dbrx
|
"transformer.norm_f", # mpt dbrx
|
||||||
"ln_f", # refact bloom qwen gpt2
|
"ln_f", # refact bloom qwen gpt2
|
||||||
@ -145,7 +145,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
# Attention query
|
# Attention query
|
||||||
MODEL_TENSOR.ATTN_Q: (
|
MODEL_TENSOR.ATTN_Q: (
|
||||||
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo_1124
|
"model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wq", # llama-pth
|
"layers.{bid}.attention.wq", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.self.query", # bert
|
"encoder.layer.{bid}.attention.self.query", # bert
|
||||||
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
||||||
@ -157,7 +157,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
# Attention key
|
# Attention key
|
||||||
MODEL_TENSOR.ATTN_K: (
|
MODEL_TENSOR.ATTN_K: (
|
||||||
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo_1124
|
"model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wk", # llama-pth
|
"layers.{bid}.attention.wk", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.self.key", # bert
|
"encoder.layer.{bid}.attention.self.key", # bert
|
||||||
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
||||||
@ -170,7 +170,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
# Attention value
|
# Attention value
|
||||||
MODEL_TENSOR.ATTN_V: (
|
MODEL_TENSOR.ATTN_V: (
|
||||||
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo_1124
|
"model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wv", # llama-pth
|
"layers.{bid}.attention.wv", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.self.value", # bert
|
"encoder.layer.{bid}.attention.self.value", # bert
|
||||||
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
||||||
@ -188,7 +188,7 @@ class TensorNameMap:
|
|||||||
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
||||||
"transformer.h.{bid}.self_attention.dense", # falcon
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||||
"h.{bid}.self_attention.dense", # bloom
|
"h.{bid}.self_attention.dense", # bloom
|
||||||
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo_1124
|
"model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2
|
||||||
"layers.{bid}.attention.wo", # llama-pth
|
"layers.{bid}.attention.wo", # llama-pth
|
||||||
"encoder.layer.{bid}.attention.output.dense", # bert
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
||||||
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
||||||
@ -215,7 +215,7 @@ class TensorNameMap:
|
|||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_POST_NORM: (
|
MODEL_TENSOR.ATTN_POST_NORM: (
|
||||||
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo_1124
|
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rotary embeddings
|
# Rotary embeddings
|
||||||
@ -250,7 +250,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
# Post feed-forward norm
|
# Post feed-forward norm
|
||||||
MODEL_TENSOR.FFN_POST_NORM: (
|
MODEL_TENSOR.FFN_POST_NORM: (
|
||||||
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo_1124
|
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
||||||
),
|
),
|
||||||
|
|
||||||
MODEL_TENSOR.FFN_GATE_INP: (
|
MODEL_TENSOR.FFN_GATE_INP: (
|
||||||
@ -273,7 +273,7 @@ class TensorNameMap:
|
|||||||
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
||||||
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
||||||
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
"h.{bid}.mlp.dense_h_to_4h", # bloom
|
||||||
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo_1124
|
"model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron olmo2
|
||||||
"layers.{bid}.feed_forward.w3", # llama-pth
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
||||||
"encoder.layer.{bid}.intermediate.dense", # bert
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
||||||
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
||||||
@ -314,7 +314,7 @@ class TensorNameMap:
|
|||||||
|
|
||||||
# Feed-forward gate
|
# Feed-forward gate
|
||||||
MODEL_TENSOR.FFN_GATE: (
|
MODEL_TENSOR.FFN_GATE: (
|
||||||
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo_1124
|
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
|
||||||
"layers.{bid}.feed_forward.w1", # llama-pth
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
||||||
"transformer.h.{bid}.mlp.w2", # qwen
|
"transformer.h.{bid}.mlp.w2", # qwen
|
||||||
"transformer.h.{bid}.mlp.c_fc2", # jais
|
"transformer.h.{bid}.mlp.c_fc2", # jais
|
||||||
@ -346,7 +346,7 @@ class TensorNameMap:
|
|||||||
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
||||||
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
||||||
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
"h.{bid}.mlp.dense_4h_to_h", # bloom
|
||||||
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo_1124
|
"model.layers.{bid}.mlp.down_proj", # llama-hf nemotron olmo2
|
||||||
"layers.{bid}.feed_forward.w2", # llama-pth
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
||||||
"encoder.layer.{bid}.output.dense", # bert
|
"encoder.layer.{bid}.output.dense", # bert
|
||||||
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
||||||
@ -383,7 +383,7 @@ class TensorNameMap:
|
|||||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.q_layernorm",
|
||||||
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
|
||||||
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo_1124
|
"model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2
|
||||||
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
|
||||||
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
"encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
|
||||||
"transformer.layers.{bid}.attn.q_norm", # openelm
|
"transformer.layers.{bid}.attn.q_norm", # openelm
|
||||||
@ -392,7 +392,7 @@ class TensorNameMap:
|
|||||||
MODEL_TENSOR.ATTN_K_NORM: (
|
MODEL_TENSOR.ATTN_K_NORM: (
|
||||||
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
"language_model.encoder.layers.{bid}.self_attention.k_layernorm",
|
||||||
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
|
||||||
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo_1124
|
"model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2
|
||||||
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
|
||||||
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
"encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
|
||||||
"transformer.layers.{bid}.attn.k_norm", # openelm
|
"transformer.layers.{bid}.attn.k_norm", # openelm
|
||||||
|
@ -179,7 +179,7 @@ enum llm_arch {
|
|||||||
LLM_ARCH_COMMAND_R,
|
LLM_ARCH_COMMAND_R,
|
||||||
LLM_ARCH_DBRX,
|
LLM_ARCH_DBRX,
|
||||||
LLM_ARCH_OLMO,
|
LLM_ARCH_OLMO,
|
||||||
LLM_ARCH_OLMO_1124,
|
LLM_ARCH_OLMO2,
|
||||||
LLM_ARCH_OLMOE,
|
LLM_ARCH_OLMOE,
|
||||||
LLM_ARCH_OPENELM,
|
LLM_ARCH_OPENELM,
|
||||||
LLM_ARCH_ARCTIC,
|
LLM_ARCH_ARCTIC,
|
||||||
@ -233,7 +233,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
|||||||
{ LLM_ARCH_COMMAND_R, "command-r" },
|
{ LLM_ARCH_COMMAND_R, "command-r" },
|
||||||
{ LLM_ARCH_DBRX, "dbrx" },
|
{ LLM_ARCH_DBRX, "dbrx" },
|
||||||
{ LLM_ARCH_OLMO, "olmo" },
|
{ LLM_ARCH_OLMO, "olmo" },
|
||||||
{ LLM_ARCH_OLMO_1124, "olmo_1124" },
|
{ LLM_ARCH_OLMO2, "olmo2" },
|
||||||
{ LLM_ARCH_OLMOE, "olmoe" },
|
{ LLM_ARCH_OLMOE, "olmoe" },
|
||||||
{ LLM_ARCH_OPENELM, "openelm" },
|
{ LLM_ARCH_OPENELM, "openelm" },
|
||||||
{ LLM_ARCH_ARCTIC, "arctic" },
|
{ LLM_ARCH_ARCTIC, "arctic" },
|
||||||
@ -1210,7 +1210,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
LLM_ARCH_OLMO_1124,
|
LLM_ARCH_OLMO2,
|
||||||
{
|
{
|
||||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||||
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
|
||||||
@ -5900,7 +5900,7 @@ static void llm_load_hparams(
|
|||||||
default: model.type = e_model::MODEL_UNKNOWN;
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_OLMO_1124:
|
case LLM_ARCH_OLMO2:
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
|
|
||||||
@ -8593,7 +8593,7 @@ static bool llm_load_tensors(
|
|||||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
|
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_OLMO_1124:
|
case LLM_ARCH_OLMO2:
|
||||||
{
|
{
|
||||||
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||||
|
|
||||||
@ -14483,7 +14483,7 @@ struct llm_build_context {
|
|||||||
return gf;
|
return gf;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_cgraph * build_olmo_1124() {
|
struct ggml_cgraph * build_olmo2() {
|
||||||
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, llama_model_max_nodes(model), false);
|
||||||
|
|
||||||
// mutable variable, needed during the last layer of the computation to skip unused tokens
|
// mutable variable, needed during the last layer of the computation to skip unused tokens
|
||||||
@ -16799,9 +16799,9 @@ static struct ggml_cgraph * llama_build_graph(
|
|||||||
{
|
{
|
||||||
result = llm.build_olmo();
|
result = llm.build_olmo();
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_OLMO_1124:
|
case LLM_ARCH_OLMO2:
|
||||||
{
|
{
|
||||||
result = llm.build_olmo_1124();
|
result = llm.build_olmo2();
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_OLMOE:
|
case LLM_ARCH_OLMOE:
|
||||||
{
|
{
|
||||||
@ -20084,7 +20084,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
|||||||
case LLM_ARCH_QWEN:
|
case LLM_ARCH_QWEN:
|
||||||
case LLM_ARCH_QWEN2:
|
case LLM_ARCH_QWEN2:
|
||||||
case LLM_ARCH_QWEN2MOE:
|
case LLM_ARCH_QWEN2MOE:
|
||||||
case LLM_ARCH_OLMO_1124:
|
case LLM_ARCH_OLMO2:
|
||||||
case LLM_ARCH_OLMOE:
|
case LLM_ARCH_OLMOE:
|
||||||
case LLM_ARCH_PHI2:
|
case LLM_ARCH_PHI2:
|
||||||
case LLM_ARCH_PHI3:
|
case LLM_ARCH_PHI3:
|
||||||
|
Loading…
Reference in New Issue
Block a user