Disable half2 for ExLlama when using HIP (#2912)

This commit is contained in:
ardfork 2023-06-29 18:03:16 +00:00 committed by GitHub
parent ac0f96e785
commit 3c076c3c80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 0 deletions

View File

@ -1,6 +1,8 @@
import sys
from pathlib import Path
from torch import version as torch_version
from modules import shared
from modules.logging_colors import logger
@ -51,6 +53,12 @@ class ExllamaModel:
if shared.args.gpu_split:
config.set_auto_map(shared.args.gpu_split)
config.gpu_peer_fix = True
if torch_version.hip:
config.rmsnorm_no_half2 = True
config.rope_no_half2 = True
config.matmul_no_half2 = True
config.silu_no_half2 = True
model = ExLlama(config)
tokenizer = ExLlamaTokenizer(str(tokenizer_model_path))

View File

@ -97,6 +97,11 @@ class ExllamaHF(PreTrainedModel):
if shared.args.gpu_split:
config.set_auto_map(shared.args.gpu_split)
config.gpu_peer_fix = True
if torch.version.hip:
config.rmsnorm_no_half2 = True
config.rope_no_half2 = True
config.matmul_no_half2 = True
config.silu_no_half2 = True
# This slowes down a bit but align better with autogptq generation.
# TODO: Should give user choice to tune the exllama config