mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-24 02:19:18 +01:00
iq3_xxs: quards for the no-imatrix situation (#5334)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
parent
7e1ae372f3
commit
89503dcb5f
10
llama.cpp
10
llama.cpp
@ -9456,8 +9456,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && qs.model.hparams.n_gqa() >= 4) {
|
||||||
new_type = GGML_TYPE_Q4_K;
|
new_type = GGML_TYPE_Q4_K;
|
||||||
}
|
}
|
||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && qs.model.hparams.n_gqa() >= 4) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
||||||
new_type = GGML_TYPE_Q4_K;
|
new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS;
|
||||||
}
|
}
|
||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
|
||||||
new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
|
new_type = qs.i_attention_wv < 2 ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
|
||||||
@ -9496,9 +9496,9 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_XS) {
|
||||||
if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K;
|
if (i_layer < n_layer/8) new_type = GGML_TYPE_Q4_K;
|
||||||
}
|
}
|
||||||
//else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS && !qs.has_imatrix) {
|
||||||
// if (i_layer < n_layer/8) new_type = GGML_TYPE_Q5_K;
|
new_type = i_layer < n_layer/8 ? GGML_TYPE_Q4_K : GGML_TYPE_Q3_K;
|
||||||
//}
|
}
|
||||||
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
|
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
|
||||||
new_type = i_layer < n_layer/16 ? GGML_TYPE_Q5_K
|
new_type = i_layer < n_layer/16 ? GGML_TYPE_Q5_K
|
||||||
: arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K
|
: arch != LLM_ARCH_FALCON || use_more_bits(i_layer, n_layer) ? GGML_TYPE_Q4_K
|
||||||
|
Loading…
Reference in New Issue
Block a user