diff --git a/gguf-llama.cpp b/gguf-llama.cpp index 27e0b5d43..7bd8cef6a 100644 --- a/gguf-llama.cpp +++ b/gguf-llama.cpp @@ -467,13 +467,11 @@ struct llama_load_tensors_map { }; enum gguf_file_version { - gguf_file_VERSION_GGML, - gguf_file_VERSION_GGMF_V1, // added version field and scores in vocab - gguf_file_VERSION_GGJT_V1, // added padding - gguf_file_VERSION_GGJT_V2, // changed quantization format - gguf_file_VERSION_GGJT_V3, // changed Q4 and Q8 quantization format + GGUF_FILE_VERSION_V1 + }; + struct gguf_file_loader { gguf_file file; gguf_context * gguf_ctx; @@ -1069,12 +1067,8 @@ int64_t llama_time_us() { static const char *gguf_file_version_name(gguf_file_version version) { switch (version) { - case gguf_file_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)"; - case gguf_file_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)"; - case gguf_file_VERSION_GGJT_V1: return "ggjt v1 (pre #1405)"; - case gguf_file_VERSION_GGJT_V2: return "ggjt v2 (pre #1508)"; - case gguf_file_VERSION_GGJT_V3: return "ggjt v3 (latest)"; - } + case GGUF_FILE_VERSION_V1: return "GGUF V1 (latest)"; + } return "unknown"; } @@ -1205,22 +1199,12 @@ static void llama_model_load_internal( fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type)); } - if (file_version < gguf_file_VERSION_GGJT_V2) { - if (hparams.ftype != LLAMA_FTYPE_ALL_F32 && - hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 && - hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) { - throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)")); - } + if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 || + hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 || + hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) { + throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)")); } - - if (file_version < gguf_file_VERSION_GGJT_V3) { - if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 || - hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 || - hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) { - throw std::runtime_error(format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)")); - } - } - + if (vocab_only) { return; }