mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-01 00:39:00 +01:00
llama : do not quantize expert gating tensors
This commit is contained in:
parent
6cfb31f9ea
commit
d1259b7b35
@ -8443,6 +8443,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
|||||||
quantize &= params->quantize_output_tensor || name != "output.weight";
|
quantize &= params->quantize_output_tensor || name != "output.weight";
|
||||||
quantize &= !params->only_copy;
|
quantize &= !params->only_copy;
|
||||||
|
|
||||||
|
// do not quantize expert gating tensors
|
||||||
|
quantize &= name.find("ffn_gate_inp.weight") == std::string::npos;
|
||||||
|
|
||||||
enum ggml_type new_type;
|
enum ggml_type new_type;
|
||||||
void * new_data;
|
void * new_data;
|
||||||
size_t new_size;
|
size_t new_size;
|
||||||
|
Loading…
Reference in New Issue
Block a user