mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 16:17:57 +01:00
Minor changes
This commit is contained in:
parent
18e6b275f3
commit
2f935547c8
@ -335,7 +335,7 @@ Optionally, you can use the following command-line flags:
|
|||||||
| `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
|
| `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
|
||||||
| `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. |
|
| `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. |
|
||||||
|
|
||||||
#### RoPE (for llama.cpp, ExLlama, and transformers)
|
#### RoPE (for llama.cpp, ExLlama, ExLlamaV2, and transformers)
|
||||||
|
|
||||||
| Flag | Description |
|
| Flag | Description |
|
||||||
|------------------|-------------|
|
|------------------|-------------|
|
||||||
|
@ -34,6 +34,13 @@ loaders_and_params = OrderedDict({
|
|||||||
'cfg_cache',
|
'cfg_cache',
|
||||||
'exllama_HF_info',
|
'exllama_HF_info',
|
||||||
],
|
],
|
||||||
|
'ExLlamav2_HF': [
|
||||||
|
'gpu_split',
|
||||||
|
'max_seq_len',
|
||||||
|
'cfg_cache',
|
||||||
|
'alpha_value',
|
||||||
|
'compress_pos_emb',
|
||||||
|
],
|
||||||
'ExLlama': [
|
'ExLlama': [
|
||||||
'gpu_split',
|
'gpu_split',
|
||||||
'max_seq_len',
|
'max_seq_len',
|
||||||
@ -48,13 +55,6 @@ loaders_and_params = OrderedDict({
|
|||||||
'alpha_value',
|
'alpha_value',
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
],
|
],
|
||||||
'ExLlamav2_HF': [
|
|
||||||
'gpu_split',
|
|
||||||
'max_seq_len',
|
|
||||||
'cfg_cache',
|
|
||||||
'alpha_value',
|
|
||||||
'compress_pos_emb',
|
|
||||||
],
|
|
||||||
'AutoGPTQ': [
|
'AutoGPTQ': [
|
||||||
'triton',
|
'triton',
|
||||||
'no_inject_fused_attention',
|
'no_inject_fused_attention',
|
||||||
|
@ -219,9 +219,9 @@ def fix_loader_name(name):
|
|||||||
return 'ExLlama'
|
return 'ExLlama'
|
||||||
elif name in ['exllama-hf', 'exllama_hf', 'exllama hf', 'ex-llama-hf', 'ex_llama_hf']:
|
elif name in ['exllama-hf', 'exllama_hf', 'exllama hf', 'ex-llama-hf', 'ex_llama_hf']:
|
||||||
return 'ExLlama_HF'
|
return 'ExLlama_HF'
|
||||||
elif name in ['exllamav2', 'exllama-v2', 'ex_llama-v2', 'exlamav2', 'exlama-v2']:
|
elif name in ['exllamav2', 'exllama-v2', 'ex_llama-v2', 'exlamav2', 'exlama-v2', 'exllama2', 'exllama-2']:
|
||||||
return 'ExLlamav2'
|
return 'ExLlamav2'
|
||||||
elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf']:
|
elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
|
||||||
return 'ExLlamav2_HF'
|
return 'ExLlamav2_HF'
|
||||||
elif name in ['ctransformers', 'ctranforemrs', 'ctransformer']:
|
elif name in ['ctransformers', 'ctranforemrs', 'ctransformer']:
|
||||||
return 'ctransformers'
|
return 'ctransformers'
|
||||||
|
Loading…
Reference in New Issue
Block a user