mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 22:59:24 +01:00
Add llama enum for IQ2_XS
This commit is contained in:
parent
a1610b05b2
commit
9bfcb16fd3
@ -2223,6 +2223,7 @@ struct llama_model_loader {
|
|||||||
case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break;
|
case GGML_TYPE_Q5_K: ftype = LLAMA_FTYPE_MOSTLY_Q5_K_M; break;
|
||||||
case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break;
|
case GGML_TYPE_Q6_K: ftype = LLAMA_FTYPE_MOSTLY_Q6_K; break;
|
||||||
case GGML_TYPE_IQ2_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XXS; break;
|
case GGML_TYPE_IQ2_XXS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XXS; break;
|
||||||
|
case GGML_TYPE_IQ2_XS: ftype = LLAMA_FTYPE_MOSTLY_IQ2_XS; break;
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
|
LLAMA_LOG_WARN("%s: unknown type %s\n", __func__, ggml_type_name(type_max));
|
||||||
@ -2595,6 +2596,7 @@ static std::string llama_model_ftype_name(llama_ftype ftype) {
|
|||||||
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
|
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
|
||||||
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ2_XXS:return "IQ2_XSS - 2.0625 bpw";
|
case LLAMA_FTYPE_MOSTLY_IQ2_XXS:return "IQ2_XSS - 2.0625 bpw";
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
|
||||||
|
|
||||||
default: return "unknown, may not work";
|
default: return "unknown, may not work";
|
||||||
}
|
}
|
||||||
@ -9041,6 +9043,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||||||
case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break;
|
case LLAMA_FTYPE_MOSTLY_Q5_K_M: quantized_type = GGML_TYPE_Q5_K; break;
|
||||||
case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
|
case LLAMA_FTYPE_MOSTLY_Q6_K: quantized_type = GGML_TYPE_Q6_K; break;
|
||||||
case LLAMA_FTYPE_MOSTLY_IQ2_XXS:quantized_type = GGML_TYPE_IQ2_XXS; break;
|
case LLAMA_FTYPE_MOSTLY_IQ2_XXS:quantized_type = GGML_TYPE_IQ2_XXS; break;
|
||||||
|
case LLAMA_FTYPE_MOSTLY_IQ2_XS :quantized_type = GGML_TYPE_IQ2_XS; break;
|
||||||
|
|
||||||
default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
|
default: throw std::runtime_error(format("invalid output file type %d\n", ftype));
|
||||||
}
|
}
|
||||||
|
1
llama.h
1
llama.h
@ -104,6 +104,7 @@ extern "C" {
|
|||||||
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, // except 1d tensors
|
LLAMA_FTYPE_MOSTLY_Q5_K_M = 17, // except 1d tensors
|
||||||
LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors
|
LLAMA_FTYPE_MOSTLY_Q6_K = 18, // except 1d tensors
|
||||||
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
|
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
|
||||||
|
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
|
||||||
|
|
||||||
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
|
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user