From 18e6b275f3e34e54e0f5dc0ccdf7e972941addbc Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 12 Sep 2023 15:02:47 -0700 Subject: [PATCH] Add alpha_value/compress_pos_emb to ExLlama-v2 --- modules/exllamav2.py | 3 +++ modules/exllamav2_hf.py | 3 +++ modules/loaders.py | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 4f89e0e6..7905db64 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -28,6 +28,9 @@ class Exllamav2Model: config.prepare() config.max_seq_len = shared.args.max_seq_len + config.rope_scale = shared.args.compress_pos_emb + config.rope_alpha = shared.args.alpha_value + model = ExLlamaV2(config) split = None diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index 2eb2d087..e4ea7f5b 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -114,6 +114,9 @@ class Exllamav2HF(PreTrainedModel): config = ExLlamaV2Config() config.model_dir = pretrained_model_name_or_path config.prepare() + config.max_seq_len = shared.args.max_seq_len + config.rope_scale = shared.args.compress_pos_emb + config.rope_alpha = shared.args.alpha_value return Exllamav2HF(config) diff --git a/modules/loaders.py b/modules/loaders.py index 28882a6a..15dd4668 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -45,11 +45,15 @@ loaders_and_params = OrderedDict({ 'ExLlamav2': [ 'gpu_split', 'max_seq_len', + 'alpha_value', + 'compress_pos_emb', ], 'ExLlamav2_HF': [ 'gpu_split', 'max_seq_len', 'cfg_cache', + 'alpha_value', + 'compress_pos_emb', ], 'AutoGPTQ': [ 'triton',