mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 11:23:56 +01:00
gguf : inference with 7B model working (WIP)
This commit is contained in:
parent
42cc04d11d
commit
cfb8e35b73
@ -493,6 +493,8 @@ struct ggml_context * ctx_data = NULL;
|
|||||||
|
|
||||||
gguf_ctx = gguf_init_from_file(fname, params);
|
gguf_ctx = gguf_init_from_file(fname, params);
|
||||||
|
|
||||||
|
read_hparams();
|
||||||
|
read_vocab();
|
||||||
read_tensor_metadata(tensors_map);
|
read_tensor_metadata(tensors_map);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -523,7 +525,7 @@ struct ggml_context * ctx_data = NULL;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw std::runtime_error(format("failed to find n_mult for n_ff = %d and n_emb = %d\n", n_ff, n_embd));
|
throw std::runtime_error(format("failed to find n_mult for n_ff = %d and n_embd = %d\n", n_ff, n_embd));
|
||||||
}
|
}
|
||||||
|
|
||||||
void read_hparams() {
|
void read_hparams() {
|
||||||
@ -534,14 +536,14 @@ struct ggml_context * ctx_data = NULL;
|
|||||||
hparams.n_ctx = read_u32("llama.context_length");
|
hparams.n_ctx = read_u32("llama.context_length");
|
||||||
hparams.n_embd = read_u32("llama.embedding_length");
|
hparams.n_embd = read_u32("llama.embedding_length");
|
||||||
uint32_t n_ff = read_u32("llama.feed_forward_length");
|
uint32_t n_ff = read_u32("llama.feed_forward_length");
|
||||||
hparams.n_mult = find_n_mult(n_ff, hparams.n_embd);
|
//hparams.n_mult = find_n_mult(n_ff, hparams.n_embd);
|
||||||
hparams.n_head = read_u32("llama.attention.head_count");
|
hparams.n_head = read_u32("llama.attention.head_count");
|
||||||
hparams.n_layer = read_u32("llama.layer_count");
|
hparams.n_layer = read_u32("llama.layer_count");
|
||||||
hparams.n_rot = hparams.n_embd / hparams.n_head;
|
hparams.n_rot = hparams.n_embd / hparams.n_head;
|
||||||
//hparams.ftype = (enum llama_ftype) file.read_u32();
|
//hparams.ftype = (enum llama_ftype) file.read_u32();
|
||||||
|
|
||||||
// LLaMAv2
|
// LLaMAv2
|
||||||
hparams.n_head_kv = read_u32("llama.attention.head_count_kv");
|
// hparams.n_head_kv = read_u32("llama.attention.head_count_kv");
|
||||||
}
|
}
|
||||||
|
|
||||||
void read_vocab() {
|
void read_vocab() {
|
||||||
|
Loading…
Reference in New Issue
Block a user