From fdcaa955e347deedfa504a53905899a0c18f7b84 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <julien@huggingface.co>
Date: Thu, 2 Nov 2023 20:20:54 +0100
Subject: [PATCH] transformers: Add a flag to force load from safetensors
 (#4450)

---
 modules/models.py        | 3 ++-
 modules/shared.py        | 1 +
 modules/ui_model_menu.py | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index cbead69d..67f3e94c 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -123,7 +123,8 @@ def huggingface_loader(model_name):
     params = {
         'low_cpu_mem_usage': True,
         'trust_remote_code': shared.args.trust_remote_code,
-        'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16
+        'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
+        'use_safetensors': True if shared.args.force_safetensors else None
     }
     config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=params['trust_remote_code'])
 
diff --git a/modules/shared.py b/modules/shared.py
index 8523930f..a6082ea8 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -91,6 +91,7 @@ parser.add_argument('--no-cache', action='store_true', help='Set use_cache to Fa
 parser.add_argument('--xformers', action='store_true', help='Use xformer\'s memory efficient attention. This is really old and probably doesn\'t do anything.')
 parser.add_argument('--sdp-attention', action='store_true', help='Use PyTorch 2.0\'s SDP attention. Same as above.')
 parser.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
+parser.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
 parser.add_argument('--use_fast', action='store_true', help='Set use_fast=True while loading the tokenizer.')
 
 # Accelerate 4-bit
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 833c1308..1c7b19d9 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -122,6 +122,7 @@ def create_ui():
                             shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
                             shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
                             shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed)
+                            shared.gradio['force_safetensors'] = gr.Checkbox(label='force_safetensors', info='Whether to force model loading from safetensors file. Prevents arbitrary code execution.', value=shared.args.force_safetensors)
                             shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.', interactive=shared.args.trust_remote_code)
                             shared.gradio['use_fast'] = gr.Checkbox(label="use_fast", value=shared.args.use_fast, info='Set use_fast=True while loading the tokenizer. May trigger a conversion that takes several minutes.')
                             shared.gradio['disable_exllama'] = gr.Checkbox(label="disable_exllama", value=shared.args.disable_exllama, info='Disable ExLlama kernel.')