mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-12 05:17:40 +01:00
Disable half2 for ExLlama when using HIP (#2912)
This commit is contained in:
parent
ac0f96e785
commit
3c076c3c80
@ -1,6 +1,8 @@
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from torch import version as torch_version
|
||||||
|
|
||||||
from modules import shared
|
from modules import shared
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
|
|
||||||
@ -51,6 +53,12 @@ class ExllamaModel:
|
|||||||
if shared.args.gpu_split:
|
if shared.args.gpu_split:
|
||||||
config.set_auto_map(shared.args.gpu_split)
|
config.set_auto_map(shared.args.gpu_split)
|
||||||
config.gpu_peer_fix = True
|
config.gpu_peer_fix = True
|
||||||
|
if torch_version.hip:
|
||||||
|
config.rmsnorm_no_half2 = True
|
||||||
|
config.rope_no_half2 = True
|
||||||
|
config.matmul_no_half2 = True
|
||||||
|
config.silu_no_half2 = True
|
||||||
|
|
||||||
|
|
||||||
model = ExLlama(config)
|
model = ExLlama(config)
|
||||||
tokenizer = ExLlamaTokenizer(str(tokenizer_model_path))
|
tokenizer = ExLlamaTokenizer(str(tokenizer_model_path))
|
||||||
|
@ -97,6 +97,11 @@ class ExllamaHF(PreTrainedModel):
|
|||||||
if shared.args.gpu_split:
|
if shared.args.gpu_split:
|
||||||
config.set_auto_map(shared.args.gpu_split)
|
config.set_auto_map(shared.args.gpu_split)
|
||||||
config.gpu_peer_fix = True
|
config.gpu_peer_fix = True
|
||||||
|
if torch.version.hip:
|
||||||
|
config.rmsnorm_no_half2 = True
|
||||||
|
config.rope_no_half2 = True
|
||||||
|
config.matmul_no_half2 = True
|
||||||
|
config.silu_no_half2 = True
|
||||||
|
|
||||||
# This slowes down a bit but align better with autogptq generation.
|
# This slowes down a bit but align better with autogptq generation.
|
||||||
# TODO: Should give user choice to tune the exllama config
|
# TODO: Should give user choice to tune the exllama config
|
||||||
|
Loading…
x
Reference in New Issue
Block a user