mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-07 11:23:56 +01:00
gguf : rm references to old file formats
This commit is contained in:
parent
d09fd10713
commit
61919c1a8f
@ -467,13 +467,11 @@ struct llama_load_tensors_map {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum gguf_file_version {
|
enum gguf_file_version {
|
||||||
gguf_file_VERSION_GGML,
|
GGUF_FILE_VERSION_V1
|
||||||
gguf_file_VERSION_GGMF_V1, // added version field and scores in vocab
|
|
||||||
gguf_file_VERSION_GGJT_V1, // added padding
|
|
||||||
gguf_file_VERSION_GGJT_V2, // changed quantization format
|
|
||||||
gguf_file_VERSION_GGJT_V3, // changed Q4 and Q8 quantization format
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct gguf_file_loader {
|
struct gguf_file_loader {
|
||||||
gguf_file file;
|
gguf_file file;
|
||||||
gguf_context * gguf_ctx;
|
gguf_context * gguf_ctx;
|
||||||
@ -1069,12 +1067,8 @@ int64_t llama_time_us() {
|
|||||||
|
|
||||||
static const char *gguf_file_version_name(gguf_file_version version) {
|
static const char *gguf_file_version_name(gguf_file_version version) {
|
||||||
switch (version) {
|
switch (version) {
|
||||||
case gguf_file_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)";
|
case GGUF_FILE_VERSION_V1: return "GGUF V1 (latest)";
|
||||||
case gguf_file_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)";
|
}
|
||||||
case gguf_file_VERSION_GGJT_V1: return "ggjt v1 (pre #1405)";
|
|
||||||
case gguf_file_VERSION_GGJT_V2: return "ggjt v2 (pre #1508)";
|
|
||||||
case gguf_file_VERSION_GGJT_V3: return "ggjt v3 (latest)";
|
|
||||||
}
|
|
||||||
|
|
||||||
return "unknown";
|
return "unknown";
|
||||||
}
|
}
|
||||||
@ -1205,22 +1199,12 @@ static void llama_model_load_internal(
|
|||||||
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
|
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file_version < gguf_file_VERSION_GGJT_V2) {
|
if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
|
||||||
if (hparams.ftype != LLAMA_FTYPE_ALL_F32 &&
|
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
|
||||||
hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 &&
|
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
|
||||||
hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
|
throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));
|
||||||
throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file_version < gguf_file_VERSION_GGJT_V3) {
|
|
||||||
if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
|
|
||||||
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
|
|
||||||
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
|
|
||||||
throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vocab_only) {
|
if (vocab_only) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user