Remove unused code

2024-12-25 05:48:55 +01:00 · 2023-05-10 11:59:59 -03:00 · 2023-05-10 11:59:59 -03:00 · 3316e33d14
commit 3316e33d14
parent ec14d9b725
3 changed files with 7 additions and 15 deletions
--- a/modules/GPTQ_loader.py
+++ b/modules/GPTQ_loader.py
@ -10,7 +10,6 @@ import transformers
 from transformers import AutoConfig, AutoModelForCausalLM
 import modules.shared as shared
 from server import get_model_specific_settings
 sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa")))
@ -115,6 +114,7 @@ def find_quantized_model_file(model_name):
        for ext in ['.safetensors', '.pt']
        for hyphen in ['-', f'/{model_name}-', '/']
    ]
    for path in priority_name_list:
        if path.exists():
            pt_path = path
@ -143,19 +143,13 @@ def find_quantized_model_file(model_name):
 # The function that loads the model in modules/models.py
 def load_quantized(model_name):
-    # Find the model type
+    if shared.args.model_type is None:
    if not shared.args.model_type:
        settings = get_model_specific_settings(model_name)
        if 'model_type' in settings and settings['model_type'] != 'None':
            model_type = settings['model_type']
        else:
        logging.error("The model could not be loaded because its type could not be inferred from its name.")
        logging.error("Please specify the type manually using the --model_type argument.")
        return
    else:
        model_type = shared.args.model_type.lower()
    # Select the appropriate load_quant function
    model_type = shared.args.model_type.lower()
    if shared.args.pre_layer and model_type == 'llama':
        load_quant = llama_inference_offload.load_quant
    elif model_type in ('llama', 'opt', 'gptj'):
--- a/modules/models.py
+++ b/modules/models.py
@ -252,7 +252,7 @@ def load_model(model_name):
    else:
        tokenizer = AutoTokenizer.from_pretrained(Path(f"{shared.args.model_dir}/{model_name}/"), trust_remote_code=trust_remote_code)
-    logging.info(f"Loaded the model in {(time.time()-t0):.2f} seconds.")
+    logging.info(f"Loaded the model in {(time.time()-t0):.2f} seconds.\n")
    return model, tokenizer
--- a/server.py
+++ b/server.py
@ -372,8 +372,6 @@ def create_model_menus():
    load.click(
        ui.gather_interface_values, [shared.gradio[k] for k in shared.input_elements], shared.gradio['interface_state']).then(
        ui.apply_interface_values, shared.gradio['interface_state'],
        [shared.gradio[k] for k in ui.list_interface_input_elements(chat=shared.is_chat())], show_progress=False).then(
        update_model_parameters, shared.gradio['interface_state'], None).then(
        partial(load_model_wrapper, autoload=True), shared.gradio['model_menu'], shared.gradio['model_status'], show_progress=False)