mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-29 07:34:18 +01:00
llama : do not quantize expert gating tensors
This commit is contained in:
parent
6cfb31f9ea
commit
d1259b7b35
@ -8443,6 +8443,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||
quantize &= params->quantize_output_tensor || name != "output.weight";
|
||||
quantize &= !params->only_copy;
|
||||
|
||||
// do not quantize expert gating tensors
|
||||
quantize &= name.find("ffn_gate_inp.weight") == std::string::npos;
|
||||
|
||||
enum ggml_type new_type;
|
||||
void * new_data;
|
||||
size_t new_size;
|
||||
|
Loading…
Reference in New Issue
Block a user