mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
llama : check for 256 divisibility for IQ2_XS, IQ2_XXS (#4950)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
parent
4a3156de2f
commit
2faaef3979
@ -8559,7 +8559,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|||||||
//}
|
//}
|
||||||
bool convert_incompatible_tensor = false;
|
bool convert_incompatible_tensor = false;
|
||||||
if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K ||
|
if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K ||
|
||||||
new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) {
|
new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K ||
|
||||||
|
new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS) {
|
||||||
int nx = tensor->ne[0];
|
int nx = tensor->ne[0];
|
||||||
int ny = tensor->ne[1];
|
int ny = tensor->ne[1];
|
||||||
if (nx % QK_K != 0) {
|
if (nx % QK_K != 0) {
|
||||||
@ -8571,6 +8572,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|||||||
}
|
}
|
||||||
if (convert_incompatible_tensor) {
|
if (convert_incompatible_tensor) {
|
||||||
switch (new_type) {
|
switch (new_type) {
|
||||||
|
case GGML_TYPE_IQ2_XXS:
|
||||||
|
case GGML_TYPE_IQ2_XS:
|
||||||
case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break;
|
case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break;
|
||||||
case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break;
|
case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break;
|
||||||
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
|
case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user