Add alpha_value/compress_pos_emb to ExLlama-v2

This commit is contained in:
oobabooga 2023-09-12 15:02:47 -07:00
parent 90fca6a77d
commit 18e6b275f3
3 changed files with 10 additions and 0 deletions

View File

@ -28,6 +28,9 @@ class Exllamav2Model:
config.prepare() config.prepare()
config.max_seq_len = shared.args.max_seq_len config.max_seq_len = shared.args.max_seq_len
config.rope_scale = shared.args.compress_pos_emb
config.rope_alpha = shared.args.alpha_value
model = ExLlamaV2(config) model = ExLlamaV2(config)
split = None split = None

View File

@ -114,6 +114,9 @@ class Exllamav2HF(PreTrainedModel):
config = ExLlamaV2Config() config = ExLlamaV2Config()
config.model_dir = pretrained_model_name_or_path config.model_dir = pretrained_model_name_or_path
config.prepare() config.prepare()
config.max_seq_len = shared.args.max_seq_len config.max_seq_len = shared.args.max_seq_len
config.rope_scale = shared.args.compress_pos_emb
config.rope_alpha = shared.args.alpha_value
return Exllamav2HF(config) return Exllamav2HF(config)

View File

@ -45,11 +45,15 @@ loaders_and_params = OrderedDict({
'ExLlamav2': [ 'ExLlamav2': [
'gpu_split', 'gpu_split',
'max_seq_len', 'max_seq_len',
'alpha_value',
'compress_pos_emb',
], ],
'ExLlamav2_HF': [ 'ExLlamav2_HF': [
'gpu_split', 'gpu_split',
'max_seq_len', 'max_seq_len',
'cfg_cache', 'cfg_cache',
'alpha_value',
'compress_pos_emb',
], ],
'AutoGPTQ': [ 'AutoGPTQ': [
'triton', 'triton',