mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-25 17:29:22 +01:00
transformers: Add a flag to force load from safetensors (#4450)
This commit is contained in:
parent
c0655475ae
commit
fdcaa955e3
@ -123,7 +123,8 @@ def huggingface_loader(model_name):
|
|||||||
params = {
|
params = {
|
||||||
'low_cpu_mem_usage': True,
|
'low_cpu_mem_usage': True,
|
||||||
'trust_remote_code': shared.args.trust_remote_code,
|
'trust_remote_code': shared.args.trust_remote_code,
|
||||||
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16
|
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
|
||||||
|
'use_safetensors': True if shared.args.force_safetensors else None
|
||||||
}
|
}
|
||||||
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=params['trust_remote_code'])
|
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=params['trust_remote_code'])
|
||||||
|
|
||||||
|
@ -91,6 +91,7 @@ parser.add_argument('--no-cache', action='store_true', help='Set use_cache to Fa
|
|||||||
parser.add_argument('--xformers', action='store_true', help='Use xformer\'s memory efficient attention. This is really old and probably doesn\'t do anything.')
|
parser.add_argument('--xformers', action='store_true', help='Use xformer\'s memory efficient attention. This is really old and probably doesn\'t do anything.')
|
||||||
parser.add_argument('--sdp-attention', action='store_true', help='Use PyTorch 2.0\'s SDP attention. Same as above.')
|
parser.add_argument('--sdp-attention', action='store_true', help='Use PyTorch 2.0\'s SDP attention. Same as above.')
|
||||||
parser.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
|
parser.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
|
||||||
|
parser.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
|
||||||
parser.add_argument('--use_fast', action='store_true', help='Set use_fast=True while loading the tokenizer.')
|
parser.add_argument('--use_fast', action='store_true', help='Set use_fast=True while loading the tokenizer.')
|
||||||
|
|
||||||
# Accelerate 4-bit
|
# Accelerate 4-bit
|
||||||
|
@ -122,6 +122,7 @@ def create_ui():
|
|||||||
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
|
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
|
||||||
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
|
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
|
||||||
shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed)
|
shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed)
|
||||||
|
shared.gradio['force_safetensors'] = gr.Checkbox(label='force_safetensors', info='Whether to force model loading from safetensors file. Prevents arbitrary code execution.', value=shared.args.force_safetensors)
|
||||||
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.', interactive=shared.args.trust_remote_code)
|
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.', interactive=shared.args.trust_remote_code)
|
||||||
shared.gradio['use_fast'] = gr.Checkbox(label="use_fast", value=shared.args.use_fast, info='Set use_fast=True while loading the tokenizer. May trigger a conversion that takes several minutes.')
|
shared.gradio['use_fast'] = gr.Checkbox(label="use_fast", value=shared.args.use_fast, info='Set use_fast=True while loading the tokenizer. May trigger a conversion that takes several minutes.')
|
||||||
shared.gradio['disable_exllama'] = gr.Checkbox(label="disable_exllama", value=shared.args.disable_exllama, info='Disable ExLlama kernel.')
|
shared.gradio['disable_exllama'] = gr.Checkbox(label="disable_exllama", value=shared.args.disable_exllama, info='Disable ExLlama kernel.')
|
||||||
|
Loading…
Reference in New Issue
Block a user