mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
Set llm_int8_enable_fp32_cpu_offload=True for --load-in-4bit
To allow for 32-bit CPU offloading (it's very slow).
This commit is contained in:
parent
70845c76fb
commit
dfdb6fee22
@ -205,6 +205,7 @@ def huggingface_loader(model_name):
|
||||
'bnb_4bit_compute_dtype': eval("torch.{}".format(shared.args.compute_dtype)) if shared.args.compute_dtype in ["bfloat16", "float16", "float32"] else None,
|
||||
'bnb_4bit_quant_type': shared.args.quant_type,
|
||||
'bnb_4bit_use_double_quant': shared.args.use_double_quant,
|
||||
'llm_int8_enable_fp32_cpu_offload': True
|
||||
}
|
||||
|
||||
params['quantization_config'] = BitsAndBytesConfig(**quantization_config_params)
|
||||
|
Loading…
Reference in New Issue
Block a user