From 2f935547c8ee4b34754bd15d64ca5ecced340205 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 12 Sep 2023 15:05:21 -0700 Subject: [PATCH] Minor changes --- README.md | 2 +- modules/loaders.py | 14 +++++++------- modules/shared.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 24fc17bc..da87da10 100644 --- a/README.md +++ b/README.md @@ -335,7 +335,7 @@ Optionally, you can use the following command-line flags: | `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". | | `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. | -#### RoPE (for llama.cpp, ExLlama, and transformers) +#### RoPE (for llama.cpp, ExLlama, ExLlamaV2, and transformers) | Flag | Description | |------------------|-------------| diff --git a/modules/loaders.py b/modules/loaders.py index 15dd4668..ff2f5050 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -34,6 +34,13 @@ loaders_and_params = OrderedDict({ 'cfg_cache', 'exllama_HF_info', ], + 'ExLlamav2_HF': [ + 'gpu_split', + 'max_seq_len', + 'cfg_cache', + 'alpha_value', + 'compress_pos_emb', + ], 'ExLlama': [ 'gpu_split', 'max_seq_len', @@ -48,13 +55,6 @@ loaders_and_params = OrderedDict({ 'alpha_value', 'compress_pos_emb', ], - 'ExLlamav2_HF': [ - 'gpu_split', - 'max_seq_len', - 'cfg_cache', - 'alpha_value', - 'compress_pos_emb', - ], 'AutoGPTQ': [ 'triton', 'no_inject_fused_attention', diff --git a/modules/shared.py b/modules/shared.py index 829d7c01..06aafc8d 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -219,9 +219,9 @@ def fix_loader_name(name): return 'ExLlama' elif name in ['exllama-hf', 'exllama_hf', 'exllama hf', 'ex-llama-hf', 'ex_llama_hf']: return 'ExLlama_HF' - elif name in ['exllamav2', 'exllama-v2', 'ex_llama-v2', 'exlamav2', 'exlama-v2']: + elif name in ['exllamav2', 'exllama-v2', 'ex_llama-v2', 'exlamav2', 'exlama-v2', 'exllama2', 'exllama-2']: return 'ExLlamav2' - elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf']: + elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']: return 'ExLlamav2_HF' elif name in ['ctransformers', 'ctranforemrs', 'ctransformer']: return 'ctransformers'