mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 06:10:29 +01:00
llama : fix quantization when tensors are missing (#5423)
This commit is contained in:
parent
df334a1125
commit
099afc6274
16
llama.cpp
16
llama.cpp
@ -772,22 +772,37 @@ struct LLM_TN {
|
|||||||
llm_arch arch;
|
llm_arch arch;
|
||||||
|
|
||||||
std::string operator()(llm_tensor tensor) const {
|
std::string operator()(llm_tensor tensor) const {
|
||||||
|
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
|
||||||
|
return "__missing__";
|
||||||
|
}
|
||||||
return LLM_TENSOR_NAMES[arch].at(tensor);
|
return LLM_TENSOR_NAMES[arch].at(tensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string operator()(llm_tensor tensor, const std::string & suffix) const {
|
std::string operator()(llm_tensor tensor, const std::string & suffix) const {
|
||||||
|
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
|
||||||
|
return "__missing__";
|
||||||
|
}
|
||||||
return LLM_TENSOR_NAMES[arch].at(tensor) + "." + suffix;
|
return LLM_TENSOR_NAMES[arch].at(tensor) + "." + suffix;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string operator()(llm_tensor tensor, int bid) const {
|
std::string operator()(llm_tensor tensor, int bid) const {
|
||||||
|
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
|
||||||
|
return "__missing__";
|
||||||
|
}
|
||||||
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid);
|
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string operator()(llm_tensor tensor, const std::string & suffix, int bid) const {
|
std::string operator()(llm_tensor tensor, const std::string & suffix, int bid) const {
|
||||||
|
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
|
||||||
|
return "__missing__";
|
||||||
|
}
|
||||||
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid) + "." + suffix;
|
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid) + "." + suffix;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string operator()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const {
|
std::string operator()(llm_tensor tensor, const std::string & suffix, int bid, int xid) const {
|
||||||
|
if (LLM_TENSOR_NAMES[arch].find(tensor) == LLM_TENSOR_NAMES[arch].end()) {
|
||||||
|
return "__missing__";
|
||||||
|
}
|
||||||
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid, xid) + "." + suffix;
|
return ::format(LLM_TENSOR_NAMES[arch].at(tensor).c_str(), bid, xid) + "." + suffix;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -10227,6 +10242,7 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
|
|||||||
}
|
}
|
||||||
++qs.i_ffn_up;
|
++qs.i_ffn_up;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;
|
// if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;
|
||||||
//}
|
//}
|
||||||
// IK: let's remove this, else Q2_K is almost the same as Q3_K_S
|
// IK: let's remove this, else Q2_K is almost the same as Q3_K_S
|
||||||
|
Loading…
Reference in New Issue
Block a user