gguf : rm references to old file formats

This commit is contained in:
M. Yusuf Sarıgöz 2023-08-11 20:36:11 +03:00
parent d09fd10713
commit 61919c1a8f

View File

@ -467,13 +467,11 @@ struct llama_load_tensors_map {
}; };
enum gguf_file_version { enum gguf_file_version {
gguf_file_VERSION_GGML, GGUF_FILE_VERSION_V1
gguf_file_VERSION_GGMF_V1, // added version field and scores in vocab
gguf_file_VERSION_GGJT_V1, // added padding
gguf_file_VERSION_GGJT_V2, // changed quantization format
gguf_file_VERSION_GGJT_V3, // changed Q4 and Q8 quantization format
}; };
struct gguf_file_loader { struct gguf_file_loader {
gguf_file file; gguf_file file;
gguf_context * gguf_ctx; gguf_context * gguf_ctx;
@ -1069,12 +1067,8 @@ int64_t llama_time_us() {
static const char *gguf_file_version_name(gguf_file_version version) { static const char *gguf_file_version_name(gguf_file_version version) {
switch (version) { switch (version) {
case gguf_file_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)"; case GGUF_FILE_VERSION_V1: return "GGUF V1 (latest)";
case gguf_file_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)"; }
case gguf_file_VERSION_GGJT_V1: return "ggjt v1 (pre #1405)";
case gguf_file_VERSION_GGJT_V2: return "ggjt v2 (pre #1508)";
case gguf_file_VERSION_GGJT_V3: return "ggjt v3 (latest)";
}
return "unknown"; return "unknown";
} }
@ -1205,22 +1199,12 @@ static void llama_model_load_internal(
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type)); fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
} }
if (file_version < gguf_file_VERSION_GGJT_V2) { if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
if (hparams.ftype != LLAMA_FTYPE_ALL_F32 && hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 && hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) { throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));
throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)"));
}
} }
if (file_version < gguf_file_VERSION_GGJT_V3) {
if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)"));
}
}
if (vocab_only) { if (vocab_only) {
return; return;
} }