diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index 5d05f5df..94d893c4 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -97,6 +97,8 @@ class LlamacppHF(PreTrainedModel): 'use_mlock': shared.args.mlock, 'low_vram': shared.args.low_vram, 'n_gpu_layers': shared.args.n_gpu_layers, + 'rope_freq_base': 10000 * shared.args.alpha_value ** (64/63.), + 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, 'logits_all': True, } diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index 86537a27..180b0f37 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -50,7 +50,9 @@ class LlamaCppModel: 'use_mmap': not shared.args.no_mmap, 'use_mlock': shared.args.mlock, 'low_vram': shared.args.low_vram, - 'n_gpu_layers': shared.args.n_gpu_layers + 'n_gpu_layers': shared.args.n_gpu_layers, + 'rope_freq_base': 10000 * shared.args.alpha_value ** (64/63.), + 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, } result.model = Llama(**params) diff --git a/modules/loaders.py b/modules/loaders.py index da38c2f5..b760128f 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -37,6 +37,8 @@ loaders_and_params = { 'low_vram', 'mlock', 'llama_cpp_seed', + 'compress_pos_emb', + 'alpha_value', ], 'llamacpp_HF': [ 'n_ctx', @@ -47,6 +49,8 @@ loaders_and_params = { 'low_vram', 'mlock', 'llama_cpp_seed', + 'compress_pos_emb', + 'alpha_value', 'llamacpp_HF_info', ], 'Transformers': [