llama : do not quantize expert gating tensors

This commit is contained in:
Georgi Gerganov 2023-12-10 13:00:13 +02:00
parent 6cfb31f9ea
commit d1259b7b35
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735

View File

@ -8443,6 +8443,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
quantize &= params->quantize_output_tensor || name != "output.weight";
quantize &= !params->only_copy;
// do not quantize expert gating tensors
quantize &= name.find("ffn_gate_inp.weight") == std::string::npos;
enum ggml_type new_type;
void * new_data;
size_t new_size;