mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 16:17:57 +01:00
Add support for RWKV in Hugging Face format
This commit is contained in:
parent
75adc110d4
commit
cd3618d7fb
@ -8,6 +8,16 @@ https://github.com/BlinkDL/ChatRWKV
|
|||||||
|
|
||||||
## Using RWKV in the web UI
|
## Using RWKV in the web UI
|
||||||
|
|
||||||
|
### Hugging Face weights
|
||||||
|
|
||||||
|
Simply download the weights from https://huggingface.co/RWKV and load them as you would for any other model.
|
||||||
|
|
||||||
|
There is a bug in transformers==4.29.2 that prevents RWKV from being loaded in 8-bit mode. You can install the dev branch to solve this bug: `pip install git+https://github.com/huggingface/transformers`
|
||||||
|
|
||||||
|
### Original .pth weights
|
||||||
|
|
||||||
|
The instructions below are from before RWKV was supported in transformers, and they are kept for legacy purposes. The old implementation is possibly faster, but it lacks the full range of samplers that the transformers library offers.
|
||||||
|
|
||||||
#### 1. Download the model
|
#### 1. Download the model
|
||||||
|
|
||||||
It is available in different sizes:
|
It is available in different sizes:
|
||||||
@ -36,7 +46,7 @@ No additional steps are required. Just launch it as you would with any other mod
|
|||||||
python server.py --listen --no-stream --model RWKV-4-Pile-169M-20220807-8023.pth
|
python server.py --listen --no-stream --model RWKV-4-Pile-169M-20220807-8023.pth
|
||||||
```
|
```
|
||||||
|
|
||||||
## Setting a custom strategy
|
### Setting a custom strategy
|
||||||
|
|
||||||
It is possible to have very fine control over the offloading and precision for the model with the `--rwkv-strategy` flag. Possible values include:
|
It is possible to have very fine control over the offloading and precision for the model with the `--rwkv-strategy` flag. Possible values include:
|
||||||
|
|
||||||
@ -49,6 +59,6 @@ It is possible to have very fine control over the offloading and precision for t
|
|||||||
|
|
||||||
See the README for the PyPl package for more details: https://pypi.org/project/rwkv/
|
See the README for the PyPl package for more details: https://pypi.org/project/rwkv/
|
||||||
|
|
||||||
## Compiling the CUDA kernel
|
### Compiling the CUDA kernel
|
||||||
|
|
||||||
You can compile the CUDA kernel for the model with `--rwkv-cuda-on`. This should improve the performance a lot but I haven't been able to get it to work yet.
|
You can compile the CUDA kernel for the model with `--rwkv-cuda-on`. This should improve the performance a lot but I haven't been able to get it to work yet.
|
@ -45,7 +45,7 @@ def find_model_type(model_name):
|
|||||||
return 'None'
|
return 'None'
|
||||||
|
|
||||||
model_name_lower = model_name.lower()
|
model_name_lower = model_name.lower()
|
||||||
if 'rwkv-' in model_name_lower:
|
if re.match('.*rwkv.*\.pth', model_name_lower):
|
||||||
return 'rwkv'
|
return 'rwkv'
|
||||||
elif len(list(path_to_model.glob('*ggml*.bin'))) > 0:
|
elif len(list(path_to_model.glob('*ggml*.bin'))) > 0:
|
||||||
return 'llamacpp'
|
return 'llamacpp'
|
||||||
|
@ -69,7 +69,7 @@ settings = {
|
|||||||
'default': 'Default',
|
'default': 'Default',
|
||||||
'.*(alpaca|llama|llava|vicuna)': "LLaMA-Precise",
|
'.*(alpaca|llama|llava|vicuna)': "LLaMA-Precise",
|
||||||
'.*pygmalion': 'NovelAI-Storywriter',
|
'.*pygmalion': 'NovelAI-Storywriter',
|
||||||
'.*RWKV': 'Naive',
|
'.*RWKV.*\.pth': 'Naive',
|
||||||
'.*moss': 'MOSS',
|
'.*moss': 'MOSS',
|
||||||
},
|
},
|
||||||
'prompts': {
|
'prompts': {
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
"default": "Default",
|
"default": "Default",
|
||||||
".*(alpaca|llama|llava|vicuna)": "LLaMA-Precise",
|
".*(alpaca|llama|llava|vicuna)": "LLaMA-Precise",
|
||||||
".*pygmalion": "NovelAI-Storywriter",
|
".*pygmalion": "NovelAI-Storywriter",
|
||||||
".*RWKV": "Naive",
|
".*RWKV.*\.pth": "Naive",
|
||||||
".*moss": "MOSS"
|
".*moss": "MOSS"
|
||||||
},
|
},
|
||||||
"prompts": {
|
"prompts": {
|
||||||
|
Loading…
Reference in New Issue
Block a user