mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
Print model version.
Also improve model type printing, and fix indentation of an unrelated switch statement.
This commit is contained in:
parent
f963b63afa
commit
180b693a47
27
llama.cpp
27
llama.cpp
@ -806,6 +806,25 @@ bool llama_mlock_supported() {
|
|||||||
// model loading
|
// model loading
|
||||||
//
|
//
|
||||||
|
|
||||||
|
static const char *llama_file_version_name(llama_file_version version) {
|
||||||
|
switch (version) {
|
||||||
|
case LLAMA_FILE_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)";
|
||||||
|
case LLAMA_FILE_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)";
|
||||||
|
case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (latest)";
|
||||||
|
default: LLAMA_ASSERT(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char *llama_model_type_name(e_model type) {
|
||||||
|
switch (type) {
|
||||||
|
case MODEL_7B: return "7B";
|
||||||
|
case MODEL_13B: return "13B";
|
||||||
|
case MODEL_30B: return "30B";
|
||||||
|
case MODEL_65B: return "65B";
|
||||||
|
default: LLAMA_ASSERT(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void llama_model_load_internal(
|
static void llama_model_load_internal(
|
||||||
const std::string & fname,
|
const std::string & fname,
|
||||||
llama_context & lctx,
|
llama_context & lctx,
|
||||||
@ -823,8 +842,9 @@ static void llama_model_load_internal(
|
|||||||
|
|
||||||
lctx.vocab = std::move(ml->file_loaders.at(0)->vocab);
|
lctx.vocab = std::move(ml->file_loaders.at(0)->vocab);
|
||||||
auto & model = lctx.model;
|
auto & model = lctx.model;
|
||||||
|
model.hparams = ml->file_loaders.at(0)->hparams;
|
||||||
|
llama_file_version file_version = ml->file_loaders.at(0)->file_version;
|
||||||
auto & hparams = model.hparams;
|
auto & hparams = model.hparams;
|
||||||
hparams = ml->file_loaders.at(0)->hparams;
|
|
||||||
uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult;
|
uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult;
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -836,7 +856,10 @@ static void llama_model_load_internal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
hparams.n_ctx = n_ctx;
|
hparams.n_ctx = n_ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%s: format = %s\n", __func__, llama_file_version_name(file_version));
|
||||||
fprintf(stderr, "%s: n_vocab = %u\n", __func__, hparams.n_vocab);
|
fprintf(stderr, "%s: n_vocab = %u\n", __func__, hparams.n_vocab);
|
||||||
fprintf(stderr, "%s: n_ctx = %u\n", __func__, hparams.n_ctx);
|
fprintf(stderr, "%s: n_ctx = %u\n", __func__, hparams.n_ctx);
|
||||||
fprintf(stderr, "%s: n_embd = %u\n", __func__, hparams.n_embd);
|
fprintf(stderr, "%s: n_embd = %u\n", __func__, hparams.n_embd);
|
||||||
@ -847,7 +870,7 @@ static void llama_model_load_internal(
|
|||||||
fprintf(stderr, "%s: f16 = %u\n", __func__, hparams.f16);
|
fprintf(stderr, "%s: f16 = %u\n", __func__, hparams.f16);
|
||||||
fprintf(stderr, "%s: n_ff = %u\n", __func__, n_ff);
|
fprintf(stderr, "%s: n_ff = %u\n", __func__, n_ff);
|
||||||
fprintf(stderr, "%s: n_parts = %zu\n", __func__, ml->file_loaders.size());
|
fprintf(stderr, "%s: n_parts = %zu\n", __func__, ml->file_loaders.size());
|
||||||
fprintf(stderr, "%s: type = %u\n", __func__, model.type);
|
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vocab_only) {
|
if (vocab_only) {
|
||||||
|
Loading…
Reference in New Issue
Block a user