diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index febb2c64..96a89429 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -48,9 +48,9 @@ class Exllamav2HF(PreTrainedModel): split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] if shared.args.enable_tp: - model.load_tp(split) + self.ex_model.load_tp(split) elif not shared.args.autosplit: - model.load(split) + self.ex_model.load(split) # Determine the correct cache type if shared.args.cache_8bit: