diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index 32381eff..2d8ef612 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -10,7 +10,6 @@ import transformers from transformers import AutoConfig, AutoModelForCausalLM import modules.shared as shared -from server import get_model_specific_settings sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa"))) @@ -115,6 +114,7 @@ def find_quantized_model_file(model_name): for ext in ['.safetensors', '.pt'] for hyphen in ['-', f'/{model_name}-', '/'] ] + for path in priority_name_list: if path.exists(): pt_path = path @@ -143,19 +143,13 @@ def find_quantized_model_file(model_name): # The function that loads the model in modules/models.py def load_quantized(model_name): - # Find the model type - if not shared.args.model_type: - settings = get_model_specific_settings(model_name) - if 'model_type' in settings and settings['model_type'] != 'None': - model_type = settings['model_type'] - else: - logging.error("The model could not be loaded because its type could not be inferred from its name.") - logging.error("Please specify the type manually using the --model_type argument.") - return - else: - model_type = shared.args.model_type.lower() + if shared.args.model_type is None: + logging.error("The model could not be loaded because its type could not be inferred from its name.") + logging.error("Please specify the type manually using the --model_type argument.") + return # Select the appropriate load_quant function + model_type = shared.args.model_type.lower() if shared.args.pre_layer and model_type == 'llama': load_quant = llama_inference_offload.load_quant elif model_type in ('llama', 'opt', 'gptj'): diff --git a/modules/models.py b/modules/models.py index d5f6594c..5b838222 100644 --- a/modules/models.py +++ b/modules/models.py @@ -252,7 +252,7 @@ def load_model(model_name): else: tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/{model_name}/"), trust_remote_code=trust_remote_code) - logging.info(f"Loaded the model in {(time.time()-t0):.2f} seconds.") + logging.info(f"Loaded the model in {(time.time()-t0):.2f} seconds.\n") return model, tokenizer diff --git a/server.py b/server.py index b8f881a4..7c3ab4b8 100644 --- a/server.py +++ b/server.py @@ -372,8 +372,6 @@ def create_model_menus(): load.click( ui.gather_interface_values, [shared.gradio[k] for k in shared.input_elements], shared.gradio['interface_state']).then( - ui.apply_interface_values, shared.gradio['interface_state'], - [shared.gradio[k] for k in ui.list_interface_input_elements(chat=shared.is_chat())], show_progress=False).then( update_model_parameters, shared.gradio['interface_state'], None).then( partial(load_model_wrapper, autoload=True), shared.gradio['model_menu'], shared.gradio['model_status'], show_progress=False)