mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-22 09:39:08 +01:00
gptneox-main.cpp : tensor name map changes
This commit is contained in:
parent
806a15749d
commit
d753dfbcc8
@ -381,6 +381,8 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
|
||||
if (keyidx != -1) { fprintf(stdout, "%s: model architecture = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
|
||||
keyidx = gguf_find_key(ggufctx, "general.file_type");
|
||||
if (keyidx != -1) { fprintf(stdout, "%s: model file type = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
|
||||
keyidx = gguf_find_key(ggufctx, "general.source.hugginface.repository");
|
||||
if (keyidx != -1) { fprintf(stdout, "%s: model source HF repo = %s\n", __func__, gguf_get_val_str(ggufctx, keyidx)); }
|
||||
}
|
||||
|
||||
// check required metadata
|
||||
@ -551,21 +553,21 @@ bool gpt_neox_model_load(const std::string & fname, gpt_neox_model & model, gpt2
|
||||
|
||||
model.blocks.resize(n_block);
|
||||
|
||||
model.wte = ggml_get_tensor(ctx, "transformer.token_embd.weight");
|
||||
model.ln_f_g = ggml_get_tensor(ctx, "transformer.output_norm.weight");
|
||||
model.ln_f_b = ggml_get_tensor(ctx, "transformer.output_norm.bias");
|
||||
model.lmh_g = ggml_get_tensor(ctx, "transformer.output.weight");
|
||||
model.wte = ggml_get_tensor(ctx, "token_embd.weight");
|
||||
model.ln_f_g = ggml_get_tensor(ctx, "output_norm.weight");
|
||||
model.ln_f_b = ggml_get_tensor(ctx, "output_norm.bias");
|
||||
model.lmh_g = ggml_get_tensor(ctx, "output.weight");
|
||||
|
||||
// map by name
|
||||
model.tensors["transformer.token_embd.weight"] = model.wte;
|
||||
model.tensors["transformer.output_norm.weight"] = model.ln_f_g;
|
||||
model.tensors["transformer.output_norm.bias"] = model.ln_f_b;
|
||||
model.tensors["transformer.output.weight"] = model.lmh_g;
|
||||
model.tensors["token_embd.weight"] = model.wte;
|
||||
model.tensors["output_norm.weight"] = model.ln_f_g;
|
||||
model.tensors["output_norm.bias"] = model.ln_f_b;
|
||||
model.tensors["output.weight"] = model.lmh_g;
|
||||
|
||||
for (int i = 0; i < n_block; ++i) {
|
||||
auto & block = model.blocks[i];
|
||||
|
||||
std::string blocknamestart = "transformer.blocks." + std::to_string(i) + ".";
|
||||
std::string blocknamestart = "blk." + std::to_string(i) + ".";
|
||||
|
||||
block.ln_1_g = get_tensor_ex(ctx, blocknamestart + "attn_norm.weight" );
|
||||
block.ln_1_b = get_tensor_ex(ctx, blocknamestart + "attn_norm.bias" );
|
||||
|
Loading…
Reference in New Issue
Block a user