From fdcaa955e347deedfa504a53905899a0c18f7b84 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 2 Nov 2023 20:20:54 +0100 Subject: [PATCH] transformers: Add a flag to force load from safetensors (#4450) --- modules/models.py | 3 ++- modules/shared.py | 1 + modules/ui_model_menu.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/models.py b/modules/models.py index cbead69d..67f3e94c 100644 --- a/modules/models.py +++ b/modules/models.py @@ -123,7 +123,8 @@ def huggingface_loader(model_name): params = { 'low_cpu_mem_usage': True, 'trust_remote_code': shared.args.trust_remote_code, - 'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16 + 'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16, + 'use_safetensors': True if shared.args.force_safetensors else None } config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=params['trust_remote_code']) diff --git a/modules/shared.py b/modules/shared.py index 8523930f..a6082ea8 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -91,6 +91,7 @@ parser.add_argument('--no-cache', action='store_true', help='Set use_cache to Fa parser.add_argument('--xformers', action='store_true', help='Use xformer\'s memory efficient attention. This is really old and probably doesn\'t do anything.') parser.add_argument('--sdp-attention', action='store_true', help='Use PyTorch 2.0\'s SDP attention. Same as above.') parser.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.') +parser.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.') parser.add_argument('--use_fast', action='store_true', help='Set use_fast=True while loading the tokenizer.') # Accelerate 4-bit diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 833c1308..1c7b19d9 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -122,6 +122,7 @@ def create_ui(): shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant) shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17') shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed) + shared.gradio['force_safetensors'] = gr.Checkbox(label='force_safetensors', info='Whether to force model loading from safetensors file. Prevents arbitrary code execution.', value=shared.args.force_safetensors) shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.', interactive=shared.args.trust_remote_code) shared.gradio['use_fast'] = gr.Checkbox(label="use_fast", value=shared.args.use_fast, info='Set use_fast=True while loading the tokenizer. May trigger a conversion that takes several minutes.') shared.gradio['disable_exllama'] = gr.Checkbox(label="disable_exllama", value=shared.args.disable_exllama, info='Disable ExLlama kernel.')