diff --git a/modules/models.py b/modules/models.py
index 2d3ce2ad..3ec4cd9d 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -99,6 +99,16 @@ def load_model(model_name):
 
         return model, tokenizer
 
+    # llamacpp model
+    elif shared.is_llamacpp:
+        from modules.llamacpp_model_alternative import LlamaCppModel
+
+        model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
+        print(f"llama.cpp weights detected: {model_file}\n")
+
+        model, tokenizer = LlamaCppModel.from_pretrained(model_file)
+        return model, tokenizer
+
     # Quantized model
     elif shared.args.wbits > 0:
 
@@ -116,16 +126,6 @@ def load_model(model_name):
 
             model = load_quantized(model_name)
 
-    # llamacpp model
-    elif shared.is_llamacpp:
-        from modules.llamacpp_model_alternative import LlamaCppModel
-
-        model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('ggml*.bin'))[0]
-        print(f"llama.cpp weights detected: {model_file}\n")
-
-        model, tokenizer = LlamaCppModel.from_pretrained(model_file)
-        return model, tokenizer
-
     # Custom
     else:
         params = {"low_cpu_mem_usage": True}