Minor changes

This commit is contained in:
oobabooga 2023-09-12 15:05:21 -07:00
parent 18e6b275f3
commit 2f935547c8
3 changed files with 10 additions and 10 deletions

View File

@ -335,7 +335,7 @@ Optionally, you can use the following command-line flags:
| `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". | | `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
| `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. | | `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. |
#### RoPE (for llama.cpp, ExLlama, and transformers) #### RoPE (for llama.cpp, ExLlama, ExLlamaV2, and transformers)
| Flag | Description | | Flag | Description |
|------------------|-------------| |------------------|-------------|

View File

@ -34,6 +34,13 @@ loaders_and_params = OrderedDict({
'cfg_cache', 'cfg_cache',
'exllama_HF_info', 'exllama_HF_info',
], ],
'ExLlamav2_HF': [
'gpu_split',
'max_seq_len',
'cfg_cache',
'alpha_value',
'compress_pos_emb',
],
'ExLlama': [ 'ExLlama': [
'gpu_split', 'gpu_split',
'max_seq_len', 'max_seq_len',
@ -48,13 +55,6 @@ loaders_and_params = OrderedDict({
'alpha_value', 'alpha_value',
'compress_pos_emb', 'compress_pos_emb',
], ],
'ExLlamav2_HF': [
'gpu_split',
'max_seq_len',
'cfg_cache',
'alpha_value',
'compress_pos_emb',
],
'AutoGPTQ': [ 'AutoGPTQ': [
'triton', 'triton',
'no_inject_fused_attention', 'no_inject_fused_attention',

View File

@ -219,9 +219,9 @@ def fix_loader_name(name):
return 'ExLlama' return 'ExLlama'
elif name in ['exllama-hf', 'exllama_hf', 'exllama hf', 'ex-llama-hf', 'ex_llama_hf']: elif name in ['exllama-hf', 'exllama_hf', 'exllama hf', 'ex-llama-hf', 'ex_llama_hf']:
return 'ExLlama_HF' return 'ExLlama_HF'
elif name in ['exllamav2', 'exllama-v2', 'ex_llama-v2', 'exlamav2', 'exlama-v2']: elif name in ['exllamav2', 'exllama-v2', 'ex_llama-v2', 'exlamav2', 'exlama-v2', 'exllama2', 'exllama-2']:
return 'ExLlamav2' return 'ExLlamav2'
elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf']: elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
return 'ExLlamav2_HF' return 'ExLlamav2_HF'
elif name in ['ctransformers', 'ctranforemrs', 'ctransformer']: elif name in ['ctransformers', 'ctranforemrs', 'ctransformer']:
return 'ctransformers' return 'ctransformers'