diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 4f89e0e6..7905db64 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -28,6 +28,9 @@ class Exllamav2Model: config.prepare() config.max_seq_len = shared.args.max_seq_len + config.rope_scale = shared.args.compress_pos_emb + config.rope_alpha = shared.args.alpha_value + model = ExLlamaV2(config) split = None diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index 2eb2d087..e4ea7f5b 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -114,6 +114,9 @@ class Exllamav2HF(PreTrainedModel): config = ExLlamaV2Config() config.model_dir = pretrained_model_name_or_path config.prepare() + config.max_seq_len = shared.args.max_seq_len + config.rope_scale = shared.args.compress_pos_emb + config.rope_alpha = shared.args.alpha_value return Exllamav2HF(config) diff --git a/modules/loaders.py b/modules/loaders.py index 28882a6a..15dd4668 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -45,11 +45,15 @@ loaders_and_params = OrderedDict({ 'ExLlamav2': [ 'gpu_split', 'max_seq_len', + 'alpha_value', + 'compress_pos_emb', ], 'ExLlamav2_HF': [ 'gpu_split', 'max_seq_len', 'cfg_cache', + 'alpha_value', + 'compress_pos_emb', ], 'AutoGPTQ': [ 'triton',