From db5f6cd1d8bd0a27a7318784157049218651470e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 30 Mar 2024 21:51:39 -0700 Subject: [PATCH] Fix ExLlamaV2 loaders using unnecessary "bits" metadata --- modules/models_settings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/models_settings.py b/modules/models_settings.py index e487209e..b3d2dc64 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -89,7 +89,8 @@ def get_model_metadata(model): if metadata['rope_scaling']['type'] == 'linear': model_settings['compress_pos_emb'] = metadata['rope_scaling']['factor'] - if 'quantization_config' in metadata: + # Read GPTQ metadata for old GPTQ loaders + if 'quantization_config' in metadata and metadata['quantization_config'].get('quant_method', '') != 'exl2': if 'bits' in metadata['quantization_config']: model_settings['wbits'] = metadata['quantization_config']['bits'] if 'group_size' in metadata['quantization_config']: