diff --git a/llama.cpp b/llama.cpp
index 4ac46193c..0a5f755ca 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8443,6 +8443,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         quantize &= params->quantize_output_tensor || name != "output.weight";
         quantize &= !params->only_copy;
 
+        // do not quantize expert gating tensors
+        quantize &= name.find("ffn_gate_inp.weight") == std::string::npos;
+
         enum ggml_type new_type;
         void * new_data;
         size_t new_size;