Merge branch 'main' into dandm1-raw_string_processing

This commit is contained in:
oobabooga 2023-08-27 09:18:58 -07:00
commit 4318c4cc18
136 changed files with 4302 additions and 2273 deletions

3
.github/pull_request_template.md vendored Normal file
View File

@ -0,0 +1,3 @@
## Checklist:
- [ ] I have read the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).

View File

@ -13,8 +13,8 @@ jobs:
- uses: actions/stale@v5
with:
stale-issue-message: ""
close-issue-message: "This issue has been closed due to inactivity for 30 days. If you believe it is still relevant, please leave a comment below."
days-before-issue-stale: 30
close-issue-message: "This issue has been closed due to inactivity for 6 weeks. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment."
days-before-issue-stale: 42
days-before-issue-close: 0
stale-issue-label: "stale"
days-before-pr-stale: -1

158
README.md
View File

@ -1,29 +1,27 @@
# Text generation web UI
A gradio web UI for running Large Language Models like LLaMA, llama.cpp, GPT-J, OPT, and GALACTICA.
A Gradio web UI for Large Language Models.
Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation.
|![Image1](https://github.com/oobabooga/screenshots/raw/main/qa.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/cai3.png) |
|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) |
|:---:|:---:|
|![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png) | ![Image4](https://github.com/oobabooga/screenshots/raw/main/galactica.png) |
|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) |
## Features
* 3 interface modes: default, notebook, and chat
* Multiple model backends: transformers, llama.cpp, ExLlama, AutoGPTQ, GPTQ-for-LLaMa
* 3 interface modes: default (two columns), notebook, and chat
* Multiple model backends: [transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlama](https://github.com/turboderp/exllama), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [ctransformers](https://github.com/marella/ctransformers)
* Dropdown menu for quickly switching between different models
* LoRA: load and unload LoRAs on the fly, train a new LoRA
* Precise instruction templates for chat mode, including Llama 2, Alpaca, Vicuna, WizardLM, StableLM, and many others
* LoRA: load and unload LoRAs on the fly, train a new LoRA using QLoRA
* Precise instruction templates for chat mode, including Llama-2-chat, Alpaca, Vicuna, WizardLM, StableLM, and many others
* 4-bit, 8-bit, and CPU inference through the transformers library
* Use llama.cpp models with transformers samplers (`llamacpp_HF` loader)
* [Multimodal pipelines, including LLaVA and MiniGPT-4](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal)
* 8-bit and 4-bit inference through bitsandbytes
* CPU mode for transformers models
* [DeepSpeed ZeRO-3 inference](docs/DeepSpeed.md)
* [Extensions](docs/Extensions.md)
* [Extensions framework](docs/Extensions.md)
* [Custom chat characters](docs/Chat-mode.md)
* Very efficient text streaming
* Markdown output with LaTeX rendering, to use for instance with [GALACTICA](https://github.com/paperswithcode/galai)
* Nice HTML output for GPT-4chan
* API, including endpoints for websocket streaming ([see the examples](https://github.com/oobabooga/text-generation-webui/blob/main/api-examples))
To learn how to use the various features, check out the Documentation: https://github.com/oobabooga/text-generation-webui/tree/main/docs
@ -38,26 +36,24 @@ To learn how to use the various features, check out the Documentation: https://g
Just download the zip above, extract it, and double-click on "start". The web UI and all its dependencies will be installed in the same folder.
* The source codes are here: https://github.com/oobabooga/one-click-installers
* The source codes and more information can be found here: https://github.com/oobabooga/one-click-installers
* There is no need to run the installers as admin.
* AMD doesn't work on Windows.
* Huge thanks to [@jllllll](https://github.com/jllllll), [@ClayShoaf](https://github.com/ClayShoaf), and [@xNul](https://github.com/xNul) for their contributions to these installers.
### Manual installation using Conda
Recommended if you have some experience with the command line.
Recommended if you have some experience with the command-line.
#### 0. Install Conda
https://docs.conda.io/en/latest/miniconda.html
On Linux or WSL, it can be automatically installed with these two commands:
On Linux or WSL, it can be automatically installed with these two commands ([source](https://educe-ubc.github.io/conda.html)):
```
curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh"
bash Miniconda3.sh
```
Source: https://educe-ubc.github.io/conda.html
#### 1. Create a new conda environment
@ -79,7 +75,7 @@ conda activate textgen
The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
#### 2.1 Special instructions
#### 2.1 Additional information
* MacOS users: https://github.com/oobabooga/text-generation-webui/pull/393
* AMD users: https://rentry.org/eq3hg
@ -92,9 +88,21 @@ cd text-generation-webui
pip install -r requirements.txt
```
#### bitsandbytes
#### llama.cpp on AMD, Metal, and some specific CPUs
bitsandbytes >= 0.39 may not work on older NVIDIA GPUs. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
Precompiled wheels are included for CPU-only and NVIDIA GPUs (cuBLAS). For AMD, Metal, and some specific CPUs, you need to uninstall those wheels and compile llama-cpp-python yourself.
To uninstall:
```
pip uninstall -y llama-cpp-python llama-cpp-python-cuda
```
To compile: https://github.com/abetlen/llama-cpp-python#installation-with-openblas--cublas--clblast--metal
#### bitsandbytes on older NVIDIA GPUs
bitsandbytes >= 0.39 may not work. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
* Linux: `pip install bitsandbytes==0.38.1`
* Windows: `pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl`
@ -113,37 +121,50 @@ docker compose up --build
### Updating the requirements
From time to time, the `requirements.txt` changes. To update, use this command:
From time to time, the `requirements.txt` changes. To update, use these commands:
```
conda activate textgen
cd text-generation-webui
pip install -r requirements.txt --upgrade
```
## Downloading models
Models should be placed inside the `models/` folder.
Models should be placed in the `text-generation-webui/models` folder. They are usually downloaded from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads).
[Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) is the main place to download models. These are some examples:
* Transformers or GPTQ models are made of several files and must be placed in a subfolder. Example:
* [Pythia](https://huggingface.co/models?sort=downloads&search=eleutherai%2Fpythia+deduped)
* [OPT](https://huggingface.co/models?search=facebook/opt)
* [GALACTICA](https://huggingface.co/models?search=facebook/galactica)
* [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main)
```
text-generation-webui
├── models
│   ├── lmsys_vicuna-33b-v1.3
│   │   ├── config.json
│   │   ├── generation_config.json
│   │   ├── pytorch_model-00001-of-00007.bin
│   │   ├── pytorch_model-00002-of-00007.bin
│   │   ├── pytorch_model-00003-of-00007.bin
│   │   ├── pytorch_model-00004-of-00007.bin
│   │   ├── pytorch_model-00005-of-00007.bin
│   │   ├── pytorch_model-00006-of-00007.bin
│   │   ├── pytorch_model-00007-of-00007.bin
│   │   ├── pytorch_model.bin.index.json
│   │   ├── special_tokens_map.json
│   │   ├── tokenizer_config.json
│   │   └── tokenizer.model
```
You can automatically download a model from HF using the script `download-model.py`:
In the "Model" tab of the UI, those models can be automatically downloaded from Hugging Face. You can also download them via the command-line with `python download-model.py organization/model`.
python download-model.py organization/model
* GGML/GGUF models are a single file and should be placed directly into `models`. Example:
For example:
```
text-generation-webui
├── models
│   ├── llama-13b.ggmlv3.q4_K_M.bin
```
python download-model.py facebook/opt-1.3b
To download a protected model, set env vars `HF_USER` and `HF_PASS` to your Hugging Face username and password (or [User Access Token](https://huggingface.co/settings/tokens)). The model's terms must first be accepted on the HF website.
#### GGML models
You can drop these directly into the `models/` folder, making sure that the file name contains `ggml` somewhere and ends in `.bin`.
Those models must be downloaded manually, as they are not currently supported by the automated downloader.
#### GPT-4chan
@ -169,7 +190,10 @@ After downloading the model, follow these steps:
python download-model.py EleutherAI/gpt-j-6B --text-only
```
When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format.
When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format:
![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png)
</details>
## Starting the web UI
@ -189,8 +213,6 @@ Optionally, you can use the following command-line flags:
| Flag | Description |
|--------------------------------------------|-------------|
| `-h`, `--help` | Show this help message and exit. |
| `--notebook` | Launch the web UI in notebook mode, where the output is written to the same text box as the input. |
| `--chat` | Launch the web UI in chat mode. |
| `--multi-user` | Multi-user mode. Chat histories are not saved or automatically loaded. WARNING: this is highly experimental. |
| `--character CHARACTER` | The name of the character to load in chat mode by default. |
| `--model MODEL` | Name of the model to load by default. |
@ -198,7 +220,6 @@ Optionally, you can use the following command-line flags:
| `--model-dir MODEL_DIR` | Path to directory with all the models. |
| `--lora-dir LORA_DIR` | Path to directory with all the loras. |
| `--model-menu` | Show a model menu in the terminal when the web UI is first launched. |
| `--no-stream` | Don't stream the text output in real time. |
| `--settings SETTINGS_FILE` | Load the default interface settings from this yaml file. See `settings-template.yaml` for an example. If you create a file called `settings.yaml`, this file will be loaded by default without the need to use the `--settings` flag. |
| `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
| `--verbose` | Print the prompts to the terminal. |
@ -207,7 +228,7 @@ Optionally, you can use the following command-line flags:
| Flag | Description |
|--------------------------------------------|-------------|
| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv |
| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv, ctransformers |
#### Accelerate/transformers
@ -237,20 +258,35 @@ Optionally, you can use the following command-line flags:
| `--quant_type QUANT_TYPE` | quant_type for 4-bit. Valid options: nf4, fp4. |
| `--use_double_quant` | use_double_quant for 4-bit. |
#### llama.cpp
#### GGML/GGUF (for llama.cpp and ctransformers)
| Flag | Description |
|-------------|-------------|
| `--threads` | Number of threads to use. |
| `--n_batch` | Maximum number of prompt tokens to batch together when calling llama_eval. |
| `--no-mmap` | Prevent mmap from being used. |
| `--mlock` | Force the system to keep the model in RAM. |
| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. |
| `--n_ctx N_CTX` | Size of the prompt context. |
#### llama.cpp
| Flag | Description |
|---------------|---------------|
| `--no-mmap` | Prevent mmap from being used. |
| `--mlock` | Force the system to keep the model in RAM. |
| `--mul_mat_q` | Activate new mulmat kernels. |
| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 |
| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). |
| `--n_gqa N_GQA` | grouped-query attention. Must be 8 for llama2 70b. |
| `--rms_norm_eps RMS_NORM_EPS` | Must be 1e-5 for llama2 70b. |
| `--n_gqa N_GQA` | GGML only (not used by GGUF): Grouped-Query Attention. Must be 8 for llama-2 70b. |
| `--rms_norm_eps RMS_NORM_EPS` | GGML only (not used by GGUF): 5e-6 is a good value for llama-2 models. |
| `--cpu` | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. |
|`--cfg-cache` | llamacpp_HF: Create an additional cache for CFG negative prompts. |
#### ctransformers
| Flag | Description |
|-------------|-------------|
| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently gpt2, gptj, gptneox, falcon, llama, mpt, starcoder (gptbigcode), dollyv2, and replit are supported. |
#### AutoGPTQ
@ -261,6 +297,7 @@ Optionally, you can use the following command-line flags:
| `--no_inject_fused_mlp` | Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference. |
| `--no_use_cuda_fp16` | This can make models faster on some systems. |
| `--desc_act` | For models that don't have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig. |
| `--disable_exllama` | Disable ExLlama kernel, which can improve inference speed on some systems. |
#### ExLlama
@ -268,6 +305,7 @@ Optionally, you can use the following command-line flags:
|------------------|-------------|
|`--gpu-split` | Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. `20,7,7` |
|`--max_seq_len MAX_SEQ_LEN` | Maximum sequence length. |
|`--cfg-cache` | ExLlama_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader, but not necessary for CFG with base ExLlama. |
#### GPTQ-for-LLaMa
@ -279,9 +317,6 @@ Optionally, you can use the following command-line flags:
| `--pre_layer PRE_LAYER [PRE_LAYER ...]` | The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg `--pre_layer 30 60`. |
| `--checkpoint CHECKPOINT` | The path to the quantized checkpoint file. If not specified, it will be automatically detected. |
| `--monkey-patch` | Apply the monkey patch for using LoRAs with quantized models.
| `--quant_attn` | (triton) Enable quant attention. |
| `--warmup_autotune` | (triton) Enable warmup autotune. |
| `--fused_mlp` | (triton) Enable fused mlp. |
#### DeepSpeed
@ -298,12 +333,13 @@ Optionally, you can use the following command-line flags:
| `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
| `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. |
#### RoPE (for llama.cpp and ExLlama only)
#### RoPE (for llama.cpp, ExLlama, and transformers)
| Flag | Description |
|------------------|-------------|
|`--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should typically be set to max_seq_len / 2048. |
|`--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both. |
| `--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
| `--rope_freq_base ROPE_FREQ_BASE` | If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63). |
| `--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale. |
#### Gradio
@ -316,6 +352,8 @@ Optionally, you can use the following command-line flags:
| `--auto-launch` | Open the web UI in the default browser upon launch. |
| `--gradio-auth USER:PWD` | set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3" |
| `--gradio-auth-path GRADIO_AUTH_PATH` | Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3" |
| `--ssl-keyfile SSL_KEYFILE` | The path to the SSL certificate key file. |
| `--ssl-certfile SSL_CERTFILE` | The path to the SSL certificate cert file. |
#### API
@ -323,6 +361,7 @@ Optionally, you can use the following command-line flags:
|---------------------------------------|-------------|
| `--api` | Enable the API extension. |
| `--public-api` | Create a public URL for the API using Cloudfare. |
| `--public-api-id PUBLIC_API_ID` | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. |
| `--api-blocking-port BLOCKING_PORT` | The listening port for the blocking API. |
| `--api-streaming-port STREAMING_PORT` | The listening port for the streaming API. |
@ -340,12 +379,13 @@ The presets that are included by default are the result of a contest that receiv
## Contributing
* Pull requests, suggestions, and issue reports are welcome.
* Make sure to carefully [search](https://github.com/oobabooga/text-generation-webui/issues) existing issues before starting a new one.
* If you have some experience with git, testing an open pull request and leaving a comment on whether it works as expected or not is immensely helpful.
* A simple way to contribute, even if you are not a programmer, is to leave a 👍 on an issue or pull request that you find relevant.
If you would like to contribute to the project, check out the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).
## Community
* Subreddit: https://www.reddit.com/r/oobaboogazz/
* Subreddit: https://www.reddit.com/r/oobabooga/
* Discord: https://discord.gg/jwZCF2dPQN
## Acknowledgment
In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition, which will allow me to dedicate more time towards realizing the full potential of text-generation-webui.

View File

@ -20,18 +20,23 @@ async def run(user_input, history):
request = {
'user_input': user_input,
'max_new_tokens': 250,
'auto_max_new_tokens': False,
'history': history,
'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct'
'character': 'Example',
'instruction_template': 'Vicuna-v1.1', # Will get autodetected if unset
# 'context_instruct': '', # Optional
'your_name': 'You',
# 'name1': 'name of user', # Optional
# 'name2': 'name of character', # Optional
# 'context': 'character context', # Optional
# 'greeting': 'greeting', # Optional
# 'name1_instruct': 'You', # Optional
# 'name2_instruct': 'Assistant', # Optional
# 'context_instruct': 'context_instruct', # Optional
# 'turn_template': 'turn_template', # Optional
'regenerate': False,
'_continue': False,
'stop_at_newline': False,
'chat_generation_attempts': 1,
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
# Generation params. If 'preset' is set to different than 'None', the values
# in presets/preset-name.yaml are used instead of the individual numbers.
@ -56,6 +61,8 @@ async def run(user_input, history):
'mirostat_mode': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'guidance_scale': 1,
'negative_prompt': '',
'seed': -1,
'add_bos_token': True,

View File

@ -14,18 +14,23 @@ def run(user_input, history):
request = {
'user_input': user_input,
'max_new_tokens': 250,
'auto_max_new_tokens': False,
'history': history,
'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct'
'character': 'Example',
'instruction_template': 'Vicuna-v1.1', # Will get autodetected if unset
# 'context_instruct': '', # Optional
'your_name': 'You',
# 'name1': 'name of user', # Optional
# 'name2': 'name of character', # Optional
# 'context': 'character context', # Optional
# 'greeting': 'greeting', # Optional
# 'name1_instruct': 'You', # Optional
# 'name2_instruct': 'Assistant', # Optional
# 'context_instruct': 'context_instruct', # Optional
# 'turn_template': 'turn_template', # Optional
'regenerate': False,
'_continue': False,
'stop_at_newline': False,
'chat_generation_attempts': 1,
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
# Generation params. If 'preset' is set to different than 'None', the values
# in presets/preset-name.yaml are used instead of the individual numbers.
@ -50,6 +55,8 @@ def run(user_input, history):
'mirostat_mode': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'guidance_scale': 1,
'negative_prompt': '',
'seed': -1,
'add_bos_token': True,

View File

@ -20,6 +20,7 @@ async def run(context):
request = {
'prompt': context,
'max_new_tokens': 250,
'auto_max_new_tokens': False,
# Generation params. If 'preset' is set to different than 'None', the values
# in presets/preset-name.yaml are used instead of the individual numbers.
@ -44,6 +45,8 @@ async def run(context):
'mirostat_mode': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'guidance_scale': 1,
'negative_prompt': '',
'seed': -1,
'add_bos_token': True,

View File

@ -12,6 +12,7 @@ def run(prompt):
request = {
'prompt': prompt,
'max_new_tokens': 250,
'auto_max_new_tokens': False,
# Generation params. If 'preset' is set to different than 'None', the values
# in presets/preset-name.yaml are used instead of the individual numbers.
@ -36,6 +37,8 @@ def run(prompt):
'mirostat_mode': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'guidance_scale': 1,
'negative_prompt': '',
'seed': -1,
'add_bos_token': True,

View File

@ -1,4 +0,0 @@
user: ""
bot: "### Response:"
turn_template: "<|user-message|>\n\n<|bot|><|bot-message|>\n\n</s>"
context: ""

View File

@ -1,126 +0,0 @@
.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
height: 66.67vh
}
.gradio-container {
margin-left: auto !important;
margin-right: auto !important;
}
.w-screen {
width: unset
}
div.svelte-362y77>*, div.svelte-362y77>.form>* {
flex-wrap: nowrap
}
/* fixes the API documentation in chat mode */
.api-docs.svelte-1iguv9h.svelte-1iguv9h.svelte-1iguv9h {
display: grid;
}
.pending.svelte-1ed2p3z {
opacity: 1;
}
#extensions {
padding: 0;
padding: 0;
}
#gradio-chatbot {
height: 66.67vh;
}
.wrap.svelte-6roggh.svelte-6roggh {
max-height: 92.5%;
}
/* This is for the microphone button in the whisper extension */
.sm.svelte-1ipelgc {
width: 100%;
}
#main button {
min-width: 0 !important;
}
/*****************************************************/
/*************** Chat box declarations ***************/
/*****************************************************/
.chat {
margin-left: auto;
margin-right: auto;
max-width: 800px;
height: calc(100vh - 286px);
overflow-y: auto;
padding-right: 20px;
display: flex;
flex-direction: column-reverse;
word-break: break-word;
overflow-wrap: anywhere;
padding-top: 1px;
}
.message-body li {
margin-top: 0.5em !important;
margin-bottom: 0.5em !important;
}
.message-body li > p {
display: inline !important;
}
.message-body ul, .message-body ol {
font-size: 15px !important;
}
.message-body ul {
list-style-type: disc !important;
}
.message-body pre {
margin-bottom: 1.25em !important;
}
.message-body code {
white-space: pre-wrap !important;
word-wrap: break-word !important;
}
.message-body :not(pre) > code {
white-space: normal !important;
}
@media print {
body {
visibility: hidden;
}
.chat {
visibility: visible;
position: absolute;
left: 0;
top: 0;
max-width: none;
max-height: none;
width: 100%;
height: fit-content;
display: flex;
flex-direction: column-reverse;
}
.message {
break-inside: avoid;
}
.gradio-container {
overflow: visible;
}
.tab-nav {
display: none !important;
}
}

View File

@ -1,4 +0,0 @@
document.getElementById("main").childNodes[0].style = "max-width: 800px; margin-left: auto; margin-right: auto";
document.getElementById("extensions").style.setProperty("max-width", "800px");
document.getElementById("extensions").style.setProperty("margin-left", "auto");
document.getElementById("extensions").style.setProperty("margin-right", "auto");

View File

@ -10,17 +10,10 @@
line-height: 1.428571429;
}
.circle-you {
background-color: gray;
border-radius: 1rem;
/*Change color to any you like to be the border of your image*/
border: 2px solid white;
}
.circle-you,
.circle-bot {
background-color: gray;
border-radius: 1rem;
/*Change color to any you like to be the border of the bot's image*/
border: 2px solid white;
}
@ -105,3 +98,39 @@
.message-body p em {
color: rgb(110, 110, 110) !important;
}
@media screen and (max-width: 688px) {
.message {
display: grid;
grid-template-columns: 60px minmax(0, 1fr);
padding-bottom: 25px;
font-size: 15px;
font-family: Helvetica, Arial, sans-serif;
line-height: 1.428571429;
}
.circle-you, .circle-bot {
width: 50px;
height: 73px;
border-radius: 0.5rem;
}
.circle-bot img,
.circle-you img {
width: 100%;
height: 100%;
object-fit: cover;
}
.text {
padding-left: 0px;
}
.message-body p {
font-size: 16px !important;
}
.username {
font-size: 20px;
}
}

View File

@ -98,7 +98,7 @@
margin-right: 40px !important;
}
#parent #container .message {
#parent #container .message_4chan {
color: black;
border: none;
}

View File

@ -43,6 +43,10 @@
margin-bottom: 9px !important;
}
.gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child {
margin-bottom: 0px !important;
}
.dark .chat .assistant-message {
background-color: #3741519e;
border: 1px solid #4b5563;
@ -58,5 +62,5 @@ code {
}
.dark code {
background-color: #1a212f !important;
background-color: #0e1321 !important;
}

View File

@ -27,3 +27,7 @@
.container :not(pre) > code {
white-space: normal !important;
}
.container .hoverable {
font-size: 14px;
}

View File

@ -7,6 +7,7 @@
}
.small-button {
min-width: 0 !important;
max-width: 171px;
height: 39.594px;
align-self: end;
@ -26,6 +27,10 @@
max-width: 2.2em;
}
.button_nowrap {
white-space: nowrap;
}
#slim-column {
flex: none !important;
min-width: 0 !important;
@ -41,9 +46,6 @@
min-height: 0
}
#accordion {
}
.dark svg {
fill: white;
}
@ -56,7 +58,7 @@ ol li p, ul li p {
display: inline-block;
}
#main, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab {
#chat-tab, #default-tab, #notebook-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab {
border: 0;
}
@ -70,7 +72,6 @@ ol li p, ul li p {
}
#extensions {
padding: 15px;
margin-bottom: 35px;
}
@ -90,6 +91,8 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
.header_bar {
background-color: #f7f7f7;
margin-bottom: 20px;
display: inline !important;
overflow-x: scroll;
}
.dark .header_bar {
@ -97,19 +100,36 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
background-color: #8080802b;
}
.header_bar button.selected {
border-radius: 0;
}
.textbox_default textarea {
height: calc(100vh - 380px);
height: calc(100dvh - 280px);
}
.textbox_default_output textarea {
height: calc(100vh - 190px);
height: calc(100dvh - 190px);
}
.textbox textarea {
height: calc(100vh - 241px);
height: calc(100dvh - 241px);
}
.textbox_default textarea, .textbox_default_output textarea, .textbox textarea {
.textbox_logits textarea {
height: calc(100dvh - 241px);
}
.textbox_logits_notebook textarea {
height: calc(100dvh - 292px);
}
.textbox_default textarea,
.textbox_default_output textarea,
.textbox_logits textarea,
.textbox_logits_notebook textarea,
.textbox textarea
{
font-size: 16px !important;
color: #46464A !important;
}
@ -118,6 +138,16 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
color: #efefef !important;
}
@media screen and (max-width: 711px) {
.textbox_default textarea {
height: calc(100dvh - 271px);
}
div .default-token-counter {
top: calc( 0.5 * (100dvh - 245px) ) !important;
}
}
/* Hide the gradio footer*/
footer {
display: none !important;
@ -157,7 +187,7 @@ button {
}
.pretty_scrollbar::-webkit-scrollbar {
width: 10px;
width: 5px;
}
.pretty_scrollbar::-webkit-scrollbar-track {
@ -167,13 +197,11 @@ button {
.pretty_scrollbar::-webkit-scrollbar-thumb,
.pretty_scrollbar::-webkit-scrollbar-thumb:hover {
background: #c5c5d2;
border-radius: 10px;
}
.dark .pretty_scrollbar::-webkit-scrollbar-thumb,
.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
background: #374151;
border-radius: 10px;
}
.pretty_scrollbar::-webkit-resizer {
@ -183,3 +211,207 @@ button {
.dark .pretty_scrollbar::-webkit-resizer {
background: #374151;
}
audio {
max-width: 100%;
}
/* Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui */
.token-counter {
position: absolute !important;
top: calc( 0.5 * (100dvh - 215px) ) !important;
right: 2px;
z-index: 100;
background: var(--input-background-fill) !important;
min-height: 0 !important;
}
.default-token-counter {
top: calc( 0.5 * (100dvh - 255px) ) !important;
}
.token-counter span {
padding: 1px;
box-shadow: 0 0 0 0.3em rgba(192,192,192,0.15), inset 0 0 0.6em rgba(192,192,192,0.075);
border: 2px solid rgba(192,192,192,0.4) !important;
border-radius: 0.4em;
}
.no-background {
background: var(--background-fill-primary) !important;
padding: 0px !important;
}
/*****************************************************/
/*************** Chat UI declarations ****************/
/*****************************************************/
.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
height: 66.67vh
}
.gradio-container {
margin-left: auto !important;
margin-right: auto !important;
}
.w-screen {
width: unset
}
div.svelte-362y77>*, div.svelte-362y77>.form>* {
flex-wrap: nowrap
}
.pending.svelte-1ed2p3z {
opacity: 1;
}
.wrap.svelte-6roggh.svelte-6roggh {
max-height: 92.5%;
}
/* This is for the microphone button in the whisper extension */
.sm.svelte-1ipelgc {
width: 100%;
}
#chat-tab button, #notebook-tab button, #default-tab button {
min-width: 0 !important;
}
#chat-tab > :first-child, #extensions {
max-width: 800px;
margin-left: auto;
margin-right: auto;
}
@media screen and (max-width: 688px) {
#chat-tab {
padding: 0px;
}
#chat {
height: calc(100dvh - 262px) !important;
}
.bigchat #chat {
height: calc(100dvh - 180px) !important;
}
.chat {
flex-direction: column-reverse !important;
}
}
.chat {
margin-left: auto;
margin-right: auto;
max-width: 800px;
height: 100%;
overflow-y: auto;
padding-right: 15px;
display: flex;
flex-direction: column;
word-break: break-word;
overflow-wrap: anywhere;
padding-top: 6px;
}
#chat {
height: calc(100dvh - 272px);
}
.bigchat #chat {
height: calc(100dvh - 200px);
}
#show-controls {
position: absolute;
background-color: transparent;
left: calc(100% - 130px);
}
.chat > .messages {
display: flex;
flex-direction: column;
}
.chat .message:last-child {
margin-bottom: 0px !important;
padding-bottom: 0px !important;
}
.message-body li {
margin-top: 0.5em !important;
margin-bottom: 0.5em !important;
}
.message-body li > p {
display: inline !important;
}
.message-body ul, .message-body ol {
font-size: 15px !important;
}
.message-body ul {
list-style-type: disc !important;
}
.message-body pre {
margin-bottom: 1.25em !important;
}
.message-body code {
white-space: pre-wrap !important;
word-wrap: break-word !important;
}
.message-body :not(pre) > code {
white-space: normal !important;
}
#chat-input {
padding: 0;
padding-top: 18px;
background: var(--background-fill-primary);
border: none;
}
#chat-input textarea:focus {
box-shadow: none !important;
}
@media print {
body {
visibility: hidden;
}
.chat {
visibility: visible;
position: absolute;
left: 0;
top: 0;
max-width: unset;
max-height: unset;
width: 100%;
overflow-y: visible;
}
.message {
break-inside: avoid;
}
.gradio-container {
overflow: visible;
}
.tab-nav {
display: none !important;
}
#chat-tab > :first-child {
max-width: unset;
}
}

View File

@ -1,25 +0,0 @@
document.getElementById("main").parentNode.childNodes[0].classList.add("header_bar");
document.getElementById("main").parentNode.style = "padding: 0; margin: 0";
document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0";
// Get references to the elements
let main = document.getElementById('main');
let main_parent = main.parentNode;
let extensions = document.getElementById('extensions');
// Add an event listener to the main element
main_parent.addEventListener('click', function(e) {
// Check if the main element is visible
if (main.offsetHeight > 0 && main.offsetWidth > 0) {
extensions.style.display = 'flex';
} else {
extensions.style.display = 'none';
}
});
const textareaElements = document.querySelectorAll('.add_scrollbar textarea');
for(i = 0; i < textareaElements.length; i++) {
textareaElements[i].classList.remove('scroll-hide');
textareaElements[i].classList.add('pretty_scrollbar');
textareaElements[i].style.resize = "none";
}

View File

@ -16,7 +16,7 @@ RUN . /build/venv/bin/activate && \
# https://developer.nvidia.com/cuda-gpus
# for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5"
ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}"
RUN . /build/venv/bin/activate && \
python3 setup_cuda.py bdist_wheel -d .
@ -26,7 +26,7 @@ LABEL maintainer="Your Name <your.email@example.com>"
LABEL description="Docker image for GPTQ-for-LLaMa and Text Generation WebUI"
RUN apt-get update && \
apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ && \
apt-get install --no-install-recommends -y python3-dev libportaudio2 libasound-dev git python3 python3-pip make g++ ffmpeg && \
rm -rf /var/lib/apt/lists/*
RUN --mount=type=cache,target=/root/.cache/pip pip3 install virtualenv
@ -51,11 +51,15 @@ COPY extensions/elevenlabs_tts/requirements.txt /app/extensions/elevenlabs_tts/r
COPY extensions/google_translate/requirements.txt /app/extensions/google_translate/requirements.txt
COPY extensions/silero_tts/requirements.txt /app/extensions/silero_tts/requirements.txt
COPY extensions/whisper_stt/requirements.txt /app/extensions/whisper_stt/requirements.txt
COPY extensions/superbooga/requirements.txt /app/extensions/superbooga/requirements.txt
COPY extensions/openai/requirements.txt /app/extensions/openai/requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/api && pip3 install -r requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/elevenlabs_tts && pip3 install -r requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/google_translate && pip3 install -r requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/silero_tts && pip3 install -r requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/whisper_stt && pip3 install -r requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/superbooga && pip3 install -r requirements.txt
RUN --mount=type=cache,target=/root/.cache/pip . /app/venv/bin/activate && cd extensions/openai && pip3 install -r requirements.txt
COPY requirements.txt /app/requirements.txt
RUN . /app/venv/bin/activate && \

View File

@ -23,6 +23,7 @@ services:
- ./prompts:/app/prompts
- ./softprompts:/app/softprompts
- ./training:/app/training
- ./cloudflared:/etc/cloudflared
deploy:
resources:
reservations:

View File

@ -39,8 +39,8 @@ The extensions framework is based on special functions and variables that you ca
| `def ui()` | Creates custom gradio elements when the UI is launched. |
| `def custom_css()` | Returns custom CSS as a string. It is applied whenever the web UI is loaded. |
| `def custom_js()` | Same as above but for javascript. |
| `def input_modifier(string, state)` | Modifies the input string before it enters the model. In chat mode, it is applied to the user message. Otherwise, it is applied to the entire prompt. |
| `def output_modifier(string, state)` | Modifies the output string before it is presented in the UI. In chat mode, it is applied to the bot's reply. Otherwise, it is applied to the entire output. |
| `def input_modifier(string, state, is_chat=False)` | Modifies the input string before it enters the model. In chat mode, it is applied to the user message. Otherwise, it is applied to the entire prompt. |
| `def output_modifier(string, state, is_chat=False)` | Modifies the output string before it is presented in the UI. In chat mode, it is applied to the bot's reply. Otherwise, it is applied to the entire output. |
| `def chat_input_modifier(text, visible_text, state)` | Modifies both the visible and internal inputs in chat mode. Can be used to hijack the chat input with custom content. |
| `def bot_prefix_modifier(string, state)` | Applied in chat mode to the prefix for the bot's reply. |
| `def state_modifier(state)` | Modifies the dictionary containing the UI input parameters before it is used by the text generation functions. |
@ -163,7 +163,7 @@ def chat_input_modifier(text, visible_text, state):
"""
return text, visible_text
def input_modifier(string, state):
def input_modifier(string, state, is_chat=False):
"""
In default/notebook modes, modifies the whole prompt.
@ -196,7 +196,7 @@ def logits_processor_modifier(processor_list, input_ids):
processor_list.append(MyLogits())
return processor_list
def output_modifier(string, state):
def output_modifier(string, state, is_chat=False):
"""
Modifies the LLM output before it gets presented.

View File

@ -64,59 +64,19 @@ python server.py --autogptq --gpu-memory 3000MiB 6000MiB --model model_name
### Using LoRAs with AutoGPTQ
Not supported yet.
Works fine for a single LoRA.
## GPTQ-for-LLaMa
GPTQ-for-LLaMa is the original adaptation of GPTQ for the LLaMA model. It was made possible by [@qwopqwop200](https://github.com/qwopqwop200/GPTQ-for-LLaMa): https://github.com/qwopqwop200/GPTQ-for-LLaMa
Different branches of GPTQ-for-LLaMa are currently available, including:
| Branch | Comment |
|----|----|
| [Old CUDA branch (recommended)](https://github.com/oobabooga/GPTQ-for-LLaMa/) | The fastest branch, works on Windows and Linux. |
| [Up-to-date triton branch](https://github.com/qwopqwop200/GPTQ-for-LLaMa) | Slightly more precise than the old CUDA branch from 13b upwards, significantly more precise for 7b. 2x slower for small context size and only works on Linux. |
| [Up-to-date CUDA branch](https://github.com/qwopqwop200/GPTQ-for-LLaMa/tree/cuda) | As precise as the up-to-date triton branch, 10x slower than the old cuda branch for small context size. |
Overall, I recommend using the old CUDA branch. It is included by default in the one-click-installer for this web UI.
### Installation
Start by cloning GPTQ-for-LLaMa into your `text-generation-webui/repositories` folder:
```
mkdir repositories
cd repositories
git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda
```
If you want to you to use the up-to-date CUDA or triton branches instead of the old CUDA branch, use these commands:
```
git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git -b cuda
```
```
git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git -b triton
```
Next you need to install the CUDA extensions. You can do that either by installing the precompiled wheels, or by compiling the wheels yourself.
A Python package containing both major CUDA versions of GPTQ-for-LLaMa is used to simplify installation and compatibility: https://github.com/jllllll/GPTQ-for-LLaMa-CUDA
### Precompiled wheels
Kindly provided by our friend jllllll: https://github.com/jllllll/GPTQ-for-LLaMa-Wheels
Kindly provided by our friend jllllll: https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases
Windows:
```
pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/main/quant_cuda-0.0.0-cp310-cp310-win_amd64.whl
```
Linux:
```
pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/Linux-x64/quant_cuda-0.0.0-cp310-cp310-linux_x86_64.whl
```
Wheels are included in requirements.txt and are installed with the webui on supported systems.
### Manual installation
@ -124,20 +84,19 @@ pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/Linux-x64/quant
```
conda activate textgen
conda install -c conda-forge cudatoolkit-dev
conda install cuda -c nvidia/label/cuda-11.7.1
```
The command above takes some 10 minutes to run and shows no progress bar or updates along the way.
You are also going to need to have a C++ compiler installed. On Linux, `sudo apt install build-essential` or equivalent is enough.
You are also going to need to have a C++ compiler installed. On Linux, `sudo apt install build-essential` or equivalent is enough. On Windows, Visual Studio or Visual Studio Build Tools is required.
If you're using an older version of CUDA toolkit (e.g. 11.7) but the latest version of `gcc` and `g++` (12.0+), you should downgrade with: `conda install -c conda-forge gxx==11.3.0`. Kernel compilation will fail otherwise.
If you're using an older version of CUDA toolkit (e.g. 11.7) but the latest version of `gcc` and `g++` (12.0+) on Linux, you should downgrade with: `conda install -c conda-forge gxx==11.3.0`. Kernel compilation will fail otherwise.
#### Step 2: compile the CUDA extensions
```
cd repositories/GPTQ-for-LLaMa
python setup_cuda.py install
python -m pip install git+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA -v
```
### Getting pre-converted LLaMA weights

View File

@ -24,14 +24,14 @@ from tqdm.contrib.concurrent import thread_map
class ModelDownloader:
def __init__(self, max_retries=5):
self.s = requests.Session()
self.session = requests.Session()
if max_retries:
self.s.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
self.s.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
self.s.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
if os.getenv('HF_TOKEN') is not None:
self.s.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}
self.session.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}
def sanitize_model_and_branch_names(self, model, branch):
if model[-1] == '/':
@ -57,12 +57,13 @@ class ModelDownloader:
classifications = []
has_pytorch = False
has_pt = False
# has_ggml = False
has_gguf = False
has_ggml = False
has_safetensors = False
is_lora = False
while True:
url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
r = self.s.get(url, timeout=10)
r = self.session.get(url, timeout=10)
r.raise_for_status()
content = r.content
@ -75,13 +76,14 @@ class ModelDownloader:
if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):
is_lora = True
is_pytorch = re.match("(pytorch|adapter|gptq)_model.*\.bin", fname)
is_safetensors = re.match(".*\.safetensors", fname)
is_pt = re.match(".*\.pt", fname)
is_ggml = re.match(".*ggml.*\.bin", fname)
is_tokenizer = re.match("(tokenizer|ice|spiece).*\.model", fname)
is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer
if any((is_pytorch, is_safetensors, is_pt, is_ggml, is_tokenizer, is_text)):
is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname)
is_safetensors = re.match(r".*\.safetensors", fname)
is_pt = re.match(r".*\.pt", fname)
is_gguf = re.match(r'.*\.gguf', fname)
is_ggml = re.match(r".*ggml.*\.bin", fname)
is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname)
is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer
if any((is_pytorch, is_safetensors, is_pt, is_gguf, is_ggml, is_tokenizer, is_text)):
if 'lfs' in dict[i]:
sha256.append([fname, dict[i]['lfs']['oid']])
@ -101,8 +103,11 @@ class ModelDownloader:
elif is_pt:
has_pt = True
classifications.append('pt')
elif is_gguf:
has_gguf = True
classifications.append('gguf')
elif is_ggml:
# has_ggml = True
has_ggml = True
classifications.append('ggml')
cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50'
@ -115,6 +120,12 @@ class ModelDownloader:
if classifications[i] in ['pytorch', 'pt']:
links.pop(i)
# If both GGML and GGUF are available, download GGUF only
if has_ggml and has_gguf:
for i in range(len(classifications) - 1, -1, -1):
if classifications[i] == 'ggml':
links.pop(i)
return links, sha256, is_lora
def get_output_folder(self, model, branch, is_lora, base_folder=None):
@ -136,7 +147,7 @@ class ModelDownloader:
if output_path.exists() and not start_from_scratch:
# Check if the file has already been downloaded completely
r = self.s.get(url, stream=True, timeout=10)
r = self.session.get(url, stream=True, timeout=10)
total_size = int(r.headers.get('content-length', 0))
if output_path.stat().st_size >= total_size:
return
@ -145,7 +156,7 @@ class ModelDownloader:
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
mode = 'ab'
with self.s.get(url, stream=True, headers=headers, timeout=10) as r:
with self.session.get(url, stream=True, headers=headers, timeout=10) as r:
r.raise_for_status() # Do not continue the download if the request was unsuccessful
total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1MB

View File

@ -200,7 +200,7 @@ class Handler(BaseHTTPRequestHandler):
super().end_headers()
def _run_server(port: int, share: bool = False):
def _run_server(port: int, share: bool = False, tunnel_id=str):
address = '0.0.0.0' if shared.args.listen else '127.0.0.1'
server = ThreadingHTTPServer((address, port), Handler)
@ -210,7 +210,7 @@ def _run_server(port: int, share: bool = False):
if share:
try:
try_start_cloudflared(port, max_attempts=3, on_start=on_start)
try_start_cloudflared(port, tunnel_id, max_attempts=3, on_start=on_start)
except Exception:
pass
else:
@ -220,5 +220,5 @@ def _run_server(port: int, share: bool = False):
server.serve_forever()
def start_server(port: int, share: bool = False):
Thread(target=_run_server, args=[port, share], daemon=True).start()
def start_server(port: int, share: bool = False, tunnel_id=str):
Thread(target=_run_server, args=[port, share, tunnel_id], daemon=True).start()

View File

@ -1,2 +1,2 @@
flask_cloudflared==0.0.12
flask_cloudflared==0.0.14
websockets==11.0.2

View File

@ -4,5 +4,5 @@ from modules import shared
def setup():
blocking_api.start_server(shared.args.api_blocking_port, share=shared.args.public_api)
streaming_api.start_server(shared.args.api_streaming_port, share=shared.args.public_api)
blocking_api.start_server(shared.args.api_blocking_port, share=shared.args.public_api, tunnel_id=shared.args.public_api_id)
streaming_api.start_server(shared.args.api_streaming_port, share=shared.args.public_api, tunnel_id=shared.args.public_api_id)

View File

@ -102,7 +102,7 @@ async def _run(host: str, port: int):
await asyncio.Future() # run forever
def _run_server(port: int, share: bool = False):
def _run_server(port: int, share: bool = False, tunnel_id=str):
address = '0.0.0.0' if shared.args.listen else '127.0.0.1'
def on_start(public_url: str):
@ -111,7 +111,7 @@ def _run_server(port: int, share: bool = False):
if share:
try:
try_start_cloudflared(port, max_attempts=3, on_start=on_start)
try_start_cloudflared(port, tunnel_id, max_attempts=3, on_start=on_start)
except Exception as e:
print(e)
else:
@ -120,5 +120,5 @@ def _run_server(port: int, share: bool = False):
asyncio.run(_run(host=address, port=port))
def start_server(port: int, share: bool = False):
Thread(target=_run_server, args=[port, share], daemon=True).start()
def start_server(port: int, share: bool = False, tunnel_id=str):
Thread(target=_run_server, args=[port, share, tunnel_id], daemon=True).start()

View File

@ -21,6 +21,7 @@ def build_parameters(body, chat=False):
generate_params = {
'max_new_tokens': int(body.get('max_new_tokens', body.get('max_length', 200))),
'auto_max_new_tokens': bool(body.get('auto_max_new_tokens', False)),
'do_sample': bool(body.get('do_sample', True)),
'temperature': float(body.get('temperature', 0.5)),
'top_p': float(body.get('top_p', 1)),
@ -42,6 +43,8 @@ def build_parameters(body, chat=False):
'mirostat_mode': int(body.get('mirostat_mode', 0)),
'mirostat_tau': float(body.get('mirostat_tau', 5)),
'mirostat_eta': float(body.get('mirostat_eta', 0.1)),
'guidance_scale': float(body.get('guidance_scale', 1)),
'negative_prompt': str(body.get('negative_prompt', '')),
'seed': int(body.get('seed', -1)),
'add_bos_token': bool(body.get('add_bos_token', True)),
'truncation_length': int(body.get('truncation_length', body.get('max_context_length', 2048))),
@ -65,30 +68,28 @@ def build_parameters(body, chat=False):
name1, name2, _, greeting, context, _ = load_character_memoized(character, str(body.get('your_name', shared.settings['name1'])), shared.settings['name2'], instruct=False)
name1_instruct, name2_instruct, _, _, context_instruct, turn_template = load_character_memoized(instruction_template, '', '', instruct=True)
generate_params.update({
'stop_at_newline': bool(body.get('stop_at_newline', shared.settings['stop_at_newline'])),
'chat_generation_attempts': int(body.get('chat_generation_attempts', shared.settings['chat_generation_attempts'])),
'mode': str(body.get('mode', 'chat')),
'name1': name1,
'name2': name2,
'context': context,
'greeting': greeting,
'name1_instruct': name1_instruct,
'name2_instruct': name2_instruct,
'context_instruct': body.get('context_instruct', context_instruct),
'turn_template': turn_template,
'chat-instruct_command': str(body.get('chat-instruct_command', shared.settings['chat-instruct_command'])),
'name1': str(body.get('name1', name1)),
'name2': str(body.get('name2', name2)),
'context': str(body.get('context', context)),
'greeting': str(body.get('greeting', greeting)),
'name1_instruct': str(body.get('name1_instruct', name1_instruct)),
'name2_instruct': str(body.get('name2_instruct', name2_instruct)),
'context_instruct': str(body.get('context_instruct', context_instruct)),
'turn_template': str(body.get('turn_template', turn_template)),
'chat-instruct_command': str(body.get('chat_instruct_command', body.get('chat-instruct_command', shared.settings['chat-instruct_command']))),
'history': body.get('history', {'internal': [], 'visible': []})
})
return generate_params
def try_start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
def try_start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
Thread(target=_start_cloudflared, args=[
port, max_attempts, on_start], daemon=True).start()
port, tunnel_id, max_attempts, on_start], daemon=True).start()
def _start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
def _start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
try:
from flask_cloudflared import _run_cloudflared
except ImportError:
@ -98,6 +99,9 @@ def _start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Call
for _ in range(max_attempts):
try:
if tunnel_id is not None:
public_url = _run_cloudflared(port, port + 1, tunnel_id=tunnel_id)
else:
public_url = _run_cloudflared(port, port + 1)
if on_start:

View File

@ -1,12 +1,13 @@
import html
import re
from pathlib import Path
import elevenlabs
import gradio as gr
from modules import chat, shared
from modules.utils import gradio
from modules import chat, shared, ui_chat
from modules.logging_colors import logger
from modules.utils import gradio
params = {
'activate': True,
@ -111,7 +112,7 @@ def output_modifier(string):
output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))
print(f'Outputting audio to {str(output_file)}')
try:
audio = elevenlabs.generate(text=string, voice=params['selected_voice'], model=params['model'])
audio = elevenlabs.generate(text=html.unescape(string), voice=params['selected_voice'], model=params['model'])
elevenlabs.save(audio, str(output_file))
autoplay = 'autoplay' if params['autoplay'] else ''
@ -167,7 +168,6 @@ def ui():
convert_cancel = gr.Button('Cancel', visible=False)
convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
if shared.is_chat():
# Convert history with confirmation
convert_arr = [convert_confirm, convert, convert_cancel]
convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
@ -175,7 +175,7 @@ def ui():
lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
remove_tts_from_history, gradio('history'), gradio('history')).then(
chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
chat.redraw_html, shared.reload_inputs, gradio('display'))
chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
@ -184,7 +184,7 @@ def ui():
lambda x: params.update({"show_text": x}), show_text, None).then(
toggle_text_in_history, gradio('history'), gradio('history')).then(
chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
chat.redraw_html, shared.reload_inputs, gradio('display'))
chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
# Event functions to update the parameters in the backend
activate.change(lambda x: params.update({'activate': x}), activate, None)

View File

@ -59,7 +59,7 @@ def chat_input_modifier(text, visible_text, state):
"""
return text, visible_text
def input_modifier(string, state):
def input_modifier(string, state, is_chat=False):
"""
In default/notebook modes, modifies the whole prompt.
@ -92,7 +92,7 @@ def logits_processor_modifier(processor_list, input_ids):
processor_list.append(MyLogits())
return processor_list
def output_modifier(string, state):
def output_modifier(string, state, is_chat=False):
"""
Modifies the LLM output before it gets presented.

View File

@ -0,0 +1,33 @@
let gallery_element = document.getElementById('gallery-extension');
let chat_mode_element = document.getElementById('chat-mode');
let extensions_block = document.getElementById('extensions');
let extensions_block_size = extensions_block.childNodes.length;
let gallery_only = (extensions_block_size == 5);
document.querySelector('.header_bar').addEventListener('click', function(event) {
if (event.target.tagName === 'BUTTON') {
const buttonText = event.target.textContent.trim();
let chat_visible = (buttonText == 'Chat');
let default_visible = (buttonText == 'Default');
let notebook_visible = (buttonText == 'Notebook');
let chat_mode_visible = (chat_mode_element.offsetHeight > 0 && chat_mode_element.offsetWidth > 0);
// Only show this extension in the Chat tab
if (chat_visible) {
if (chat_mode_visible) {
gallery_element.style.display = 'block';
extensions_block.style.display = '';
} else {
gallery_element.style.display = 'none';
extensions_block.style.display = 'none';
}
} else {
gallery_element.style.display = 'none';
if (gallery_only) {
extensions_block.style.display = 'none';
}
}
}
});

View File

@ -82,8 +82,13 @@ def select_character(evt: gr.SelectData):
return (evt.value[1])
def custom_js():
path_to_js = Path(__file__).parent.resolve() / 'script.js'
return open(path_to_js, 'r').read()
def ui():
with gr.Accordion("Character gallery", open=False):
with gr.Accordion("Character gallery", open=False, elem_id='gallery-extension'):
update = gr.Button("Refresh")
gr.HTML(value="<style>" + generate_css() + "</style>")
gallery = gr.Dataset(components=[gr.HTML(visible=False)],

View File

@ -28,7 +28,7 @@ class MyLogits(LogitsProcessor):
def __call__(self, input_ids, scores):
if input_ids.shape[-1] - initial_size < params["min_length"]:
scores[...,self.newline_id] = -1000
scores[...,shared.tokenizer.eos_token_id] = -1000
# scores[...,shared.tokenizer.eos_token_id] = -1000
# probs = torch.softmax(scores, dim=-1, dtype=torch.float)
# probs[0] /= probs[0].sum()

View File

@ -165,7 +165,7 @@ def messages_to_prompt(body: dict, req_params: dict, max_tokens):
# Instruct models can be much better
if shared.settings['instruction_template']:
try:
instruct = yaml.safe_load(open(f"characters/instruction-following/{shared.settings['instruction_template']}.yaml", 'r'))
instruct = yaml.safe_load(open(f"instruction-templates/{shared.settings['instruction_template']}.yaml", 'r'))
template = instruct['turn_template']
system_message_template = "{message}"
@ -193,7 +193,7 @@ def messages_to_prompt(body: dict, req_params: dict, max_tokens):
except Exception as e:
req_params['stopping_strings'].extend(['\nUser:', 'User:']) # XXX User: prompt here also
print(f"Exception: When loading characters/instruction-following/{shared.settings['instruction_template']}.yaml: {repr(e)}")
print(f"Exception: When loading instruction-templates/{shared.settings['instruction_template']}.yaml: {repr(e)}")
print("Warning: Loaded default instruction-following template for model.")
else:

View File

@ -4,6 +4,7 @@ import copy
# Data type is important, Ex. use 0.0 for a float 0
default_req_params = {
'max_new_tokens': 16, # 'Inf' for chat
'auto_max_new_tokens': False,
'temperature': 1.0,
'top_p': 1.0,
'top_k': 1, # choose 20 for chat in absence of another default
@ -32,6 +33,8 @@ default_req_params = {
'mirostat_mode': 0,
'mirostat_tau': 5.0,
'mirostat_eta': 0.1,
'guidance_scale': 1,
'negative_prompt': '',
'ban_eos_token': False,
'skip_special_tokens': True,
'custom_stopping_strings': '',

View File

@ -31,7 +31,7 @@ def edits(instruction: str, input: str, temperature=1.0, top_p=1.0) -> dict:
stopping_strings.extend(['\n###'])
else:
try:
instruct = yaml.safe_load(open(f"characters/instruction-following/{shared.settings['instruction_template']}.yaml", 'r'))
instruct = yaml.safe_load(open(f"instruction-templates/{shared.settings['instruction_template']}.yaml", 'r'))
template = instruct['turn_template']
template = template\
@ -45,7 +45,7 @@ def edits(instruction: str, input: str, temperature=1.0, top_p=1.0) -> dict:
except Exception as e:
instruction_template = default_template
print(f"Exception: When loading characters/instruction-following/{shared.settings['instruction_template']}.yaml: {repr(e)}")
print(f"Exception: When loading instruction-templates/{shared.settings['instruction_template']}.yaml: {repr(e)}")
print("Warning: Loaded default instruction-following template (Alpaca) for model.")
else:
stopping_strings.extend(['\n###'])

View File

@ -67,10 +67,13 @@ class Handler(BaseHTTPRequestHandler):
self.send_response(code)
self.send_access_control_headers()
self.send_header('Content-Type', 'application/json')
self.end_headers()
response = json.dumps(ret)
r_utf8 = response.encode('utf-8')
self.send_header('Content-Length', str(len(r_utf8)))
self.end_headers()
self.wfile.write(r_utf8)
if not no_debug:
debug_msg(r_utf8)

View File

@ -1,17 +1,22 @@
import time
import gradio
import numpy as np
import torch
from transformers import LogitsProcessor
import numpy as np
from modules import shared
from modules import html_generator, shared
params = {
'active': True,
'color_by_perplexity': False,
'color_by_probability': False,
'ppl_scale': 15.0, # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often.
#'probability_dropdown': False
'probability_dropdown': False,
'verbose': False # For debugging mostly
}
class PerplexityLogits(LogitsProcessor):
def __init__(self, verbose=False):
self.generated_token_ids = []
@ -23,8 +28,9 @@ class PerplexityLogits(LogitsProcessor):
self.verbose = verbose
def __call__(self, input_ids, scores):
# t0 = time.time()
probs = torch.softmax(scores, dim=-1, dtype=torch.float)
log_probs = torch.nan_to_num(torch.log(probs))
log_probs = torch.nan_to_num(torch.log(probs)) # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity.
entropy = -torch.sum(probs * log_probs)
entropy = entropy.cpu().numpy()
perplexity = round(float(np.exp(entropy)), 4)
@ -36,16 +42,16 @@ class PerplexityLogits(LogitsProcessor):
if len(self.selected_probs) > 0:
# Is the selected token in the top tokens?
if self.verbose:
print(shared.tokenizer.decode(last_token_id))
print([shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1]])
print(self.top_probs_list[-1])
if last_token_id in self.top_token_ids_list[-1]:
idx = self.top_token_ids_list[-1].index(last_token_id)
self.selected_probs.append(self.top_probs_list[-1][idx])
print('Probs: Token after', shared.tokenizer.decode(last_token_id))
print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]])
print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
if last_token_id in self.top_token_ids_list[-1][0]:
idx = self.top_token_ids_list[-1][0].index(last_token_id)
self.selected_probs.append(self.top_probs_list[-1][0][idx])
else:
self.top_token_ids_list[-1].append(last_token_id)
self.top_token_ids_list[-1][0].append(last_token_id)
last_prob = round(float(self.last_probs[last_token_id]), 4)
self.top_probs_list[-1].append(last_prob)
self.top_probs_list[-1][0].append(last_prob)
self.selected_probs.append(last_prob)
else:
self.selected_probs.append(1.0) # Placeholder for the last token of the prompt
@ -54,7 +60,7 @@ class PerplexityLogits(LogitsProcessor):
pplbar = "-"
if not np.isnan(perplexity):
pplbar = "*" * round(perplexity)
print(f"{last_token}\t{perplexity:.2f}\t{pplbar}")
print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}")
# Get top 5 probabilities
top_tokens_and_probs = torch.topk(probs, 5)
@ -67,76 +73,73 @@ class PerplexityLogits(LogitsProcessor):
probs = probs.cpu().numpy().flatten()
self.last_probs = probs # Need to keep this as a reference for top probs
# t1 = time.time()
# print(f"PPL Processor: {(t1-t0):.3f} s")
# About 1 ms, though occasionally up to around 100 ms, not sure why...
# Doesn't actually modify the logits!
return scores
# Stores the perplexity and top probabilities
ppl_logits_processor = None
def logits_processor_modifier(logits_processor_list, input_ids):
global ppl_logits_processor
ppl_logits_processor = PerplexityLogits()
if params['active']:
ppl_logits_processor = PerplexityLogits(verbose=params['verbose'])
logits_processor_list.append(ppl_logits_processor)
def output_modifier(text):
global ppl_logits_processor
# t0 = time.time()
if not params['active']:
return text
# TODO: It's probably more efficient to do this above rather than modifying all these lists
# Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation
perplexities = ppl_logits_processor.perplexities_list[:-1]
top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1]
top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids] for top_token_ids in top_token_ids_list]
top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids[0]] for top_token_ids in top_token_ids_list]
top_probs_list = ppl_logits_processor.top_probs_list[:-1]
# Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt
gen_token_ids = ppl_logits_processor.generated_token_ids[1:]
gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids]
sel_probs = ppl_logits_processor.selected_probs[1:]
end_part = '</span>' # Helps with finding the index after replacing part of the text.
in_code = False # Since the <span> tags mess up code blocks, avoid coloring while inside a code block, based on finding tokens with '`' in them
end_part = '</div></div>' if params['probability_dropdown'] else '</span>' # Helps with finding the index after replacing part of the text.
if params['color_by_probability'] and params['color_by_perplexity']:
i = 0
for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
if '`' in token:
in_code = not in_code
continue
if in_code:
continue
color = 'ffffff'
if params['color_by_probability'] and params['color_by_perplexity']:
color = probability_perplexity_color_scale(prob, ppl)
if token in text[i:]:
text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
i += text[i:].find(end_part) + len(end_part)
elif params['color_by_perplexity']:
i = 0
for token, ppl, top_tokens, top_probs in zip(gen_tokens, perplexities, top_tokens_list, top_probs_list):
if '`' in token:
in_code = not in_code
continue
if in_code:
continue
color = perplexity_color_scale(ppl)
if token in text[i:]:
text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
i += text[i:].find(end_part) + len(end_part)
elif params['color_by_probability']:
i = 0
for token, prob, top_tokens, top_probs in zip(gen_tokens, sel_probs, top_tokens_list, top_probs_list):
if '`' in token:
in_code = not in_code
continue
if in_code:
continue
color = probability_color_scale(prob)
if token in text[i:]:
if params['probability_dropdown']:
text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], ppl), 1)
else:
text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
i += text[i:].find(end_part) + len(end_part)
print('Average perplexity:', round(np.mean(perplexities), 4))
# Use full perplexity list for calculating the average here.
print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
# t1 = time.time()
# print(f"Modifier: {(t1-t0):.3f} s")
# About 50 ms
return text
# Green-yellow-red color scale
def probability_color_scale(prob):
'''
Green-yellow-red color scale
'''
rv = 0
gv = 0
if prob <= 0.5:
@ -149,22 +152,32 @@ def probability_color_scale(prob):
gv = 'ff'
if len(rv) < 2:
rv = '0' * (2 - len(rv)) + rv
return rv + gv + '00'
# Red component only, white for 0 perplexity (sorry if you're not in dark mode)
def perplexity_color_scale(ppl):
'''
Red component only, white for 0 perplexity (sorry if you're not in dark mode)
'''
value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:]
if len(value) < 2:
value = '0' * (2 - len(value)) + value
return 'ff' + value + value
# Green-yellow-red for probability and blue component for perplexity
def probability_perplexity_color_scale(prob, ppl):
'''
Green-yellow-red for probability and blue component for perplexity
'''
rv = 0
gv = 0
bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:]
if len(bv) < 2:
bv = '0' * (2 - len(bv)) + bv
if prob <= 0.5:
rv = 'ff'
gv = hex(int(255 * prob * 2))[2:]
@ -175,41 +188,122 @@ def probability_perplexity_color_scale(prob, ppl):
gv = 'ff'
if len(rv) < 2:
rv = '0' * (2 - len(rv)) + rv
return rv + gv + bv
def add_color_html(token, color):
return f'<span style="color: #{color}">{token}</span>'
"""
# This is still very broken at the moment, needs CSS too but I'm not very good at CSS (and neither is GPT-4 apparently) so I still need to figure that out.
def add_dropdown_html(token, color, top_tokens, top_probs):
html = f'<span class="hoverable" style="color: #{color}">{token}<div class="dropdown"><table class="dropdown-content">'
for token, prob in zip(top_tokens, top_probs):
# TODO: Background color? Bold for selected token?
# Bigger issue: Why is there a newline after the first token, and the dropdown fails there?
# The HTML ends up like <p><span>word</span></p><div>...</div>,
# even though for all other tokens it shows up correctly.
# TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history.
# I think the issue is from HTML elements taking up space in the visible history, and things like history deepcopy add latency proportional to the size of the history.
# Potential solution is maybe to modify the main generation code to send just the internal text and not the visible history, to avoid moving too much around.
# I wonder if we can also avoid using deepcopy here.
def add_dropdown_html(token, color, top_tokens, top_probs, perplexity=0):
html = f'<div class="hoverable"><span style="color: #{color}">{token}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
for token_option, prob in zip(top_tokens, top_probs):
# TODO: Bold for selected token?
# Using divs prevented the problem of divs inside spans causing issues.
# Now the problem is that divs show the same whitespace of one space between every token.
# There is probably some way to fix this in CSS that I don't know about.
row_color = probability_color_scale(prob)
html += f'<tr><td style="color: #{row_color}">{token}</td><td style="color: #{row_color}">{prob}</td></tr>'
html += '</table></div></span>'
return html
row_class = ' class="selected"' if token_option == token else ''
html += f'<tr{row_class}><td style="color: #{row_color}">{token_option}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
if perplexity != 0:
ppl_color = perplexity_color_scale(perplexity)
html += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
html += '</tbody></table></div></div>'
return html # About 750 characters per token...
def custom_css():
return """
.dropdown {
display: none;
position: absolute;
z-index: 50;
background-color: var(--block-background-fill);
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
width: max-content;
overflow: visible;
padding: 5px;
border-radius: 10px;
border: 1px solid var(--border-color-primary);
}
.dropdown-content {
border: none;
z-index: 50;
}
.dropdown-content tr.selected {
background-color: var(--block-label-background-fill);
}
.dropdown-content td {
color: var(--body-text-color);
}
.hoverable {
color: var(--body-text-color);
position: relative;
display: inline-block;
overflow: visible;
font-size: 15px;
line-height: 1.75;
margin: 0;
padding: 0;
}
.hoverable:hover .dropdown {
display: block;
}
pre {
white-space: pre-wrap;
}
# TODO: This makes the hover menus extend outside the bounds of the chat area, which is good.
# However, it also makes the scrollbar disappear, which is bad.
# The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area.
#.chat {
# overflow-y: auto;
#}
"""
# Monkeypatch applied to html_generator.py
# We simply don't render markdown into HTML. We wrap everything in <pre> tags to preserve whitespace
# formatting. If you're coloring tokens by perplexity or probability, or especially if you're using
# the probability dropdown, you probably care more about seeing the tokens the model actually outputted
# rather than rendering ```code blocks``` or *italics*.
def convert_to_markdown(string):
return '<pre>' + string + '</pre>'
html_generator.convert_to_markdown = convert_to_markdown
def ui():
color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
def update_active_check(x):
params.update({'active': x})
def update_color_by_ppl_check(x):
params.update({'color_by_perplexity': x})
color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)
color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
def update_color_by_prob_check(x):
params.update({'color_by_probability': x})
color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)
# Doesn't work yet...
"""
prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown")
def update_prob_dropdown_check(x):
params.update({'probability_dropdown': x})
active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.")
color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.")
active_check.change(update_active_check, active_check, None)
color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)
color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)
prob_dropdown_check.change(update_prob_dropdown_check, prob_dropdown_check, None)
"""

View File

@ -133,6 +133,9 @@ def get_SD_pictures(description, character):
if params['manage_VRAM']:
give_VRAM_priority('SD')
description = re.sub('<audio.*?</audio>', ' ', description)
description = f"({description}:1)"
payload = {
"prompt": params['prompt_prefix'] + description,
"seed": params['seed'],

View File

@ -5,7 +5,7 @@ import gradio as gr
import torch
from transformers import BlipForConditionalGeneration, BlipProcessor
from modules import chat, shared
from modules import chat, shared, ui_chat
from modules.ui import gather_interface_values
from modules.utils import gradio
@ -54,5 +54,5 @@ def ui():
"value": generate_chat_picture(picture, name1, name2)
}), [picture_select, shared.gradio['name1'], shared.gradio['name2']], None).then(
gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then(
chat.generate_chat_reply_wrapper, gradio(ui_chat.inputs), gradio('display', 'history'), show_progress=False).then(
lambda: None, None, picture_select, show_progress=False)

View File

@ -0,0 +1,720 @@
The birch canoe slid on the smooth planks.
Glue the sheet to the dark blue background.
It's easy to tell the depth of a well.
These days a chicken leg is a rare dish.
Rice is often served in round bowls.
The juice of lemons makes fine punch.
The box was thrown beside the parked truck.
The hogs were fed chopped corn and garbage.
Four hours of steady work faced us.
A large size in stockings is hard to sell.
The boy was there when the sun rose.
A rod is used to catch pink salmon.
The source of the huge river is the clear spring.
Kick the ball straight and follow through.
Help the woman get back to her feet.
A pot of tea helps to pass the evening.
Smoky fires lack flame and heat.
The soft cushion broke the man's fall.
The salt breeze came across from the sea.
The girl at the booth sold fifty bonds.
The small pup gnawed a hole in the sock.
The fish twisted and turned on the bent hook.
Press the pants and sew a button on the vest.
The swan dive was far short of perfect.
The beauty of the view stunned the young boy.
Two blue fish swam in the tank.
Her purse was full of useless trash.
The colt reared and threw the tall rider.
It snowed, rained, and hailed the same morning.
Read verse out loud for pleasure.
Hoist the load to your left shoulder.
Take the winding path to reach the lake.
Note closely the size of the gas tank.
Wipe the grease off his dirty face.
Mend the coat before you go out.
The wrist was badly strained and hung limp.
The stray cat gave birth to kittens.
The young girl gave no clear response.
The meal was cooked before the bell rang.
What joy there is in living.
A king ruled the state in the early days.
The ship was torn apart on the sharp reef.
Sickness kept him home the third week.
The wide road shimmered in the hot sun.
The lazy cow lay in the cool grass.
Lift the square stone over the fence.
The rope will bind the seven books at once.
Hop over the fence and plunge in.
The friendly gang left the drug store.
Mesh wire keeps chicks inside.
The frosty air passed through the coat.
The crooked maze failed to fool the mouse.
Adding fast leads to wrong sums.
The show was a flop from the very start.
A saw is a tool used for making boards.
The wagon moved on well oiled wheels.
March the soldiers past the next hill.
A cup of sugar makes sweet fudge.
Place a rosebush near the porch steps.
Both lost their lives in the raging storm.
We talked of the side show in the circus.
Use a pencil to write the first draft.
He ran half way to the hardware store.
The clock struck to mark the third period.
A small creek cut across the field.
Cars and busses stalled in snow drifts.
The set of china hit the floor with a crash.
This is a grand season for hikes on the road.
The dune rose from the edge of the water.
Those words were the cue for the actor to leave.
A yacht slid around the point into the bay.
The two met while playing on the sand.
The ink stain dried on the finished page.
The walled town was seized without a fight.
The lease ran out in sixteen weeks.
A tame squirrel makes a nice pet.
The horn of the car woke the sleeping cop.
The heart beat strongly and with firm strokes.
The pearl was worn in a thin silver ring.
The fruit peel was cut in thick slices.
The Navy attacked the big task force.
See the cat glaring at the scared mouse.
There are more than two factors here.
The hat brim was wide and too droopy.
The lawyer tried to lose his case.
The grass curled around the fence post.
Cut the pie into large parts.
Men strive but seldom get rich.
Always close the barn door tight.
He lay prone and hardly moved a limb.
The slush lay deep along the street.
A wisp of cloud hung in the blue air.
A pound of sugar costs more than eggs.
The fin was sharp and cut the clear water.
The play seems dull and quite stupid.
Bail the boat to stop it from sinking.
The term ended in late June that year.
A tusk is used to make costly gifts.
Ten pins were set in order.
The bill was paid every third week.
Oak is strong and also gives shade.
Cats and dogs each hate the other.
The pipe began to rust while new.
Open the crate but don't break the glass.
Add the sum to the product of these three.
Thieves who rob friends deserve jail.
The ripe taste of cheese improves with age.
Act on these orders with great speed.
The hog crawled under the high fence.
Move the vat over the hot fire.
The bark of the pine tree was shiny and dark.
Leaves turn brown and yellow in the fall.
The pennant waved when the wind blew.
Split the log with a quick, sharp blow.
Burn peat after the logs give out.
He ordered peach pie with ice cream.
Weave the carpet on the right hand side.
Hemp is a weed found in parts of the tropics.
A lame back kept his score low.
We find joy in the simplest things.
Type out three lists of orders.
The harder he tried the less he got done.
The boss ran the show with a watchful eye.
The cup cracked and spilled its contents.
Paste can cleanse the most dirty brass.
The slang word for raw whiskey is booze.
It caught its hind paw in a rusty trap.
The wharf could be seen at the farther shore.
Feel the heat of the weak dying flame.
The tiny girl took off her hat.
A cramp is no small danger on a swim.
He said the same phrase thirty times.
Pluck the bright rose without leaves.
Two plus seven is less than ten.
The glow deepened in the eyes of the sweet girl.
Bring your problems to the wise chief.
Write a fond note to the friend you cherish.
Clothes and lodging are free to new men.
We frown when events take a bad turn.
Port is a strong wine with a smoky taste.
The young kid jumped the rusty gate.
Guess the results from the first scores.
A salt pickle tastes fine with ham.
The just claim got the right verdict.
These thistles bend in a high wind.
Pure bred poodles have curls.
The tree top waved in a graceful way.
The spot on the blotter was made by green ink.
Mud was spattered on the front of his white shirt.
The cigar burned a hole in the desk top.
The empty flask stood on the tin tray.
A speedy man can beat this track mark.
He broke a new shoelace that day.
The coffee stand is too high for the couch.
The urge to write short stories is rare.
The pencils have all been used.
The pirates seized the crew of the lost ship.
We tried to replace the coin but failed.
She sewed the torn coat quite neatly.
The sofa cushion is red and of light weight.
The jacket hung on the back of the wide chair.
At that high level the air is pure.
Drop the two when you add the figures.
A filing case is now hard to buy.
An abrupt start does not win the prize.
Wood is best for making toys and blocks.
The office paint was a dull, sad tan.
He knew the skill of the great young actress.
A rag will soak up spilled water.
A shower of dirt fell from the hot pipes.
Steam hissed from the broken valve.
The child almost hurt the small dog.
There was a sound of dry leaves outside.
The sky that morning was clear and bright blue.
Torn scraps littered the stone floor.
Sunday is the best part of the week.
The doctor cured him with these pills.
The new girl was fired today at noon.
They felt gay when the ship arrived in port.
Add the store's account to the last cent.
Acid burns holes in wool cloth.
Fairy tales should be fun to write.
Eight miles of woodland burned to waste.
The third act was dull and tired the players.
A young child should not suffer fright.
Add the column and put the sum here.
We admire and love a good cook.
There the flood mark is ten inches.
He carved a head from the round block of marble.
She has a smart way of wearing clothes.
The fruit of a fig tree is apple-shaped.
Corn cobs can be used to kindle a fire.
Where were they when the noise started.
The paper box is full of thumb tacks.
Sell your gift to a buyer at a good gain.
The tongs lay beside the ice pail.
The petals fall with the next puff of wind.
Bring your best compass to the third class.
They could laugh although they were sad.
Farmers came in to thresh the oat crop.
The brown house was on fire to the attic.
The lure is used to catch trout and flounder.
Float the soap on top of the bath water.
A blue crane is a tall wading bird.
A fresh start will work such wonders.
The club rented the rink for the fifth night.
After the dance, they went straight home.
The hostess taught the new maid to serve.
He wrote his last novel there at the inn.
Even the worst will beat his low score.
The cement had dried when he moved it.
The loss of the second ship was hard to take.
The fly made its way along the wall.
Do that with a wooden stick.
Live wires should be kept covered.
The large house had hot water taps.
It is hard to erase blue or red ink.
Write at once or you may forget it.
The doorknob was made of bright clean brass.
The wreck occurred by the bank on Main Street.
A pencil with black lead writes best.
Coax a young calf to drink from a bucket.
Schools for ladies teach charm and grace.
The lamp shone with a steady green flame.
They took the axe and the saw to the forest.
The ancient coin was quite dull and worn.
The shaky barn fell with a loud crash.
Jazz and swing fans like fast music.
Rake the rubbish up and then burn it.
Slash the gold cloth into fine ribbons.
Try to have the court decide the case.
They are pushed back each time they attack.
He broke his ties with groups of former friends.
They floated on the raft to sun their white backs.
The map had an X that meant nothing.
Whitings are small fish caught in nets.
Some ads serve to cheat buyers.
Jerk the rope and the bell rings weakly.
A waxed floor makes us lose balance.
Madam, this is the best brand of corn.
On the islands the sea breeze is soft and mild.
The play began as soon as we sat down.
This will lead the world to more sound and fury.
Add salt before you fry the egg.
The rush for funds reached its peak Tuesday.
The birch looked stark white and lonesome.
The box is held by a bright red snapper.
To make pure ice, you freeze water.
The first worm gets snapped early.
Jump the fence and hurry up the bank.
Yell and clap as the curtain slides back.
They are men who walk the middle of the road.
Both brothers wear the same size.
In some form or other we need fun.
The prince ordered his head chopped off.
The houses are built of red clay bricks.
Ducks fly north but lack a compass.
Fruit flavors are used in fizz drinks.
These pills do less good than others.
Canned pears lack full flavor.
The dark pot hung in the front closet.
Carry the pail to the wall and spill it there.
The train brought our hero to the big town.
We are sure that one war is enough.
Gray paint stretched for miles around.
The rude laugh filled the empty room.
High seats are best for football fans.
Tea served from the brown jug is tasty.
A dash of pepper spoils beef stew.
A zestful food is the hot-cross bun.
The horse trotted around the field at a brisk pace.
Find the twin who stole the pearl necklace.
Cut the cord that binds the box tightly.
The red tape bound the smuggled food.
Look in the corner to find the tan shirt.
The cold drizzle will halt the bond drive.
Nine men were hired to dig the ruins.
The junk yard had a mouldy smell.
The flint sputtered and lit a pine torch.
Soak the cloth and drown the sharp odor.
The shelves were bare of both jam or crackers.
A joy to every child is the swan boat.
All sat frozen and watched the screen.
A cloud of dust stung his tender eyes.
To reach the end he needs much courage.
Shape the clay gently into block form.
A ridge on a smooth surface is a bump or flaw.
Hedge apples may stain your hands green.
Quench your thirst, then eat the crackers.
Tight curls get limp on rainy days.
The mute muffled the high tones of the horn.
The gold ring fits only a pierced ear.
The old pan was covered with hard fudge.
Watch the log float in the wide river.
The node on the stalk of wheat grew daily.
The heap of fallen leaves was set on fire.
Write fast if you want to finish early.
His shirt was clean but one button was gone.
The barrel of beer was a brew of malt and hops.
Tin cans are absent from store shelves.
Slide the box into that empty space.
The plant grew large and green in the window.
The beam dropped down on the workmen's head.
Pink clouds floated with the breeze.
She danced like a swan, tall and graceful.
The tube was blown and the tire flat and useless.
It is late morning on the old wall clock.
Let's all join as we sing the last chorus.
The last switch cannot be turned off.
The fight will end in just six minutes.
The store walls were lined with colored frocks.
The peace league met to discuss their plans.
The rise to fame of a person takes luck.
Paper is scarce, so write with much care.
The quick fox jumped on the sleeping cat.
The nozzle of the fire hose was bright brass.
Screw the round cap on as tight as needed.
Time brings us many changes.
The purple tie was ten years old.
Men think and plan and sometimes act.
Fill the ink jar with sticky glue.
He smoke a big pipe with strong contents.
We need grain to keep our mules healthy.
Pack the records in a neat thin case.
The crunch of feet in the snow was the only sound.
The copper bowl shone in the sun's rays.
Boards will warp unless kept dry.
The plush chair leaned against the wall.
Glass will clink when struck by metal.
Bathe and relax in the cool green grass.
Nine rows of soldiers stood in line.
The beach is dry and shallow at low tide.
The idea is to sew both edges straight.
The kitten chased the dog down the street.
Pages bound in cloth make a book.
Try to trace the fine lines of the painting.
Women form less than half of the group.
The zones merge in the central part of town.
A gem in the rough needs work to polish.
Code is used when secrets are sent.
Most of the news is easy for us to hear.
He used the lathe to make brass objects.
The vane on top of the pole revolved in the wind.
Mince pie is a dish served to children.
The clan gathered on each dull night.
Let it burn, it gives us warmth and comfort.
A castle built from sand fails to endure.
A child's wit saved the day for us.
Tack the strip of carpet to the worn floor.
Next Tuesday we must vote.
Pour the stew from the pot into the plate.
Each penny shone like new.
The man went to the woods to gather sticks.
The dirt piles were lines along the road.
The logs fell and tumbled into the clear stream.
Just hoist it up and take it away.
A ripe plum is fit for a king's palate.
Our plans right now are hazy.
Brass rings are sold by these natives.
It takes a good trap to capture a bear.
Feed the white mouse some flower seeds.
The thaw came early and freed the stream.
He took the lead and kept it the whole distance.
The key you designed will fit the lock.
Plead to the council to free the poor thief.
Better hash is made of rare beef.
This plank was made for walking on.
The lake sparkled in the red hot sun.
He crawled with care along the ledge.
Tend the sheep while the dog wanders.
It takes a lot of help to finish these.
Mark the spot with a sign painted red.
Take two shares as a fair profit.
The fur of cats goes by many names.
North winds bring colds and fevers.
He asks no person to vouch for him.
Go now and come here later.
A sash of gold silk will trim her dress.
Soap can wash most dirt away.
That move means the game is over.
He wrote down a long list of items.
A siege will crack the strong defense.
Grape juice and water mix well.
Roads are paved with sticky tar.
Fake stones shine but cost little.
The drip of the rain made a pleasant sound.
Smoke poured out of every crack.
Serve the hot rum to the tired heroes.
Much of the story makes good sense.
The sun came up to light the eastern sky.
Heave the line over the port side.
A lathe cuts and trims any wood.
It's a dense crowd in two distinct ways.
His hip struck the knee of the next player.
The stale smell of old beer lingers.
The desk was firm on the shaky floor.
It takes heat to bring out the odor.
Beef is scarcer than some lamb.
Raise the sail and steer the ship northward.
A cone costs five cents on Mondays.
A pod is what peas always grow in.
Jerk the dart from the cork target.
No cement will hold hard wood.
We now have a new base for shipping.
A list of names is carved around the base.
The sheep were led home by a dog.
Three for a dime, the young peddler cried.
The sense of smell is better than that of touch.
No hardship seemed to keep him sad.
Grace makes up for lack of beauty.
Nudge gently but wake her now.
The news struck doubt into restless minds.
Once we stood beside the shore.
A chink in the wall allowed a draft to blow.
Fasten two pins on each side.
A cold dip restores health and zest.
He takes the oath of office each March.
The sand drifts over the sill of the old house.
The point of the steel pen was bent and twisted.
There is a lag between thought and act.
Seed is needed to plant the spring corn.
Draw the chart with heavy black lines.
The boy owed his pal thirty cents.
The chap slipped into the crowd and was lost.
Hats are worn to tea and not to dinner.
The ramp led up to the wide highway.
Beat the dust from the rug onto the lawn.
Say it slowly but make it ring clear.
The straw nest housed five robins.
Screen the porch with woven straw mats.
This horse will nose his way to the finish.
The dry wax protects the deep scratch.
He picked up the dice for a second roll.
These coins will be needed to pay his debt.
The nag pulled the frail cart along.
Twist the valve and release hot steam.
The vamp of the shoe had a gold buckle.
The smell of burned rags itches my nose.
New pants lack cuffs and pockets.
The marsh will freeze when cold enough.
They slice the sausage thin with a knife.
The bloom of the rose lasts a few days.
A gray mare walked before the colt.
Breakfast buns are fine with a hot drink.
Bottles hold four kinds of rum.
The man wore a feather in his felt hat.
He wheeled the bike past the winding road.
Drop the ashes on the worn old rug.
The desk and both chairs were painted tan.
Throw out the used paper cup and plate.
A clean neck means a neat collar.
The couch cover and hall drapes were blue.
The stems of the tall glasses cracked and broke.
The wall phone rang loud and often.
The clothes dried on a thin wooden rack.
Turn on the lantern which gives us light.
The cleat sank deeply into the soft turf.
The bills were mailed promptly on the tenth of the month.
To have is better than to wait and hope.
The price is fair for a good antique clock.
The music played on while they talked.
Dispense with a vest on a day like this.
The bunch of grapes was pressed into wine.
He sent the figs, but kept the ripe cherries.
The hinge on the door creaked with old age.
The screen before the fire kept in the sparks.
Fly by night, and you waste little time.
Thick glasses helped him read the print.
Birth and death mark the limits of life.
The chair looked strong but had no bottom.
The kite flew wildly in the high wind.
A fur muff is stylish once more.
The tin box held priceless stones.
We need an end of all such matter.
The case was puzzling to the old and wise.
The bright lanterns were gay on the dark lawn.
We don't get much money but we have fun.
The youth drove with zest, but little skill.
Five years he lived with a shaggy dog.
A fence cuts through the corner lot.
The way to save money is not to spend much.
Shut the hatch before the waves push it in.
The odor of spring makes young hearts jump.
Crack the walnut with your sharp side teeth.
He offered proof in the form of a large chart.
Send the stuff in a thick paper bag.
A quart of milk is water for the most part.
They told wild tales to frighten him.
The three story house was built of stone.
In the rear of the ground floor was a large passage.
A man in a blue sweater sat at the desk.
Oats are a food eaten by horse and man.
Their eyelids droop for want of sleep.
A sip of tea revives his tired friend.
There are many ways to do these things.
Tuck the sheet under the edge of the mat.
A force equal to that would move the earth.
We like to see clear weather.
The work of the tailor is seen on each side.
Take a chance and win a china doll.
Shake the dust from your shoes, stranger.
She was kind to sick old people.
The square wooden crate was packed to be shipped.
The dusty bench stood by the stone wall.
We dress to suit the weather of most days.
Smile when you say nasty words.
A bowl of rice is free with chicken stew.
The water in this well is a source of good health.
Take shelter in this tent, but keep still.
That guy is the writer of a few banned books.
The little tales they tell are false.
The door was barred, locked, and bolted as well.
Ripe pears are fit for a queen's table.
A big wet stain was on the round carpet.
The kite dipped and swayed, but stayed aloft.
The pleasant hours fly by much too soon.
The room was crowded with a wild mob.
This strong arm shall shield your honor.
She blushed when he gave her a white orchid.
The beetle droned in the hot June sun.
Press the pedal with your left foot.
Neat plans fail without luck.
The black trunk fell from the landing.
The bank pressed for payment of the debt.
The theft of the pearl pin was kept secret.
Shake hands with this friendly child.
The vast space stretched into the far distance.
A rich farm is rare in this sandy waste.
His wide grin earned many friends.
Flax makes a fine brand of paper.
Hurdle the pit with the aid of a long pole.
A strong bid may scare your partner stiff.
Even a just cause needs power to win.
Peep under the tent and see the clowns.
The leaf drifts along with a slow spin.
Cheap clothes are flashy but don't last.
A thing of small note can cause despair.
Flood the mails with requests for this book.
A thick coat of black paint covered all.
The pencil was cut to be sharp at both ends.
Those last words were a strong statement.
He wrote his name boldly at the top of the sheet.
Dill pickles are sour but taste fine.
Down that road is the way to the grain farmer.
Either mud or dust are found at all times.
The best method is to fix it in place with clips.
If you mumble your speech will be lost.
At night the alarm roused him from a deep sleep.
Read just what the meter says.
Fill your pack with bright trinkets for the poor.
The small red neon lamp went out.
Clams are small, round, soft, and tasty.
The fan whirled its round blades softly.
The line where the edges join was clean.
Breathe deep and smell the piny air.
It matters not if he reads these words or those.
A brown leather bag hung from its strap.
A toad and a frog are hard to tell apart.
A white silk jacket goes with any shoes.
A break in the dam almost caused a flood.
Paint the sockets in the wall dull green.
The child crawled into the dense grass.
Bribes fail where honest men work.
Trample the spark, else the flames will spread.
The hilt of the sword was carved with fine designs.
A round hole was drilled through the thin board.
Footprints showed the path he took up the beach.
She was waiting at my front lawn.
A vent near the edge brought in fresh air.
Prod the old mule with a crooked stick.
It is a band of steel three inches wide.
The pipe ran almost the length of the ditch.
It was hidden from sight by a mass of leaves and shrubs.
The weight of the package was seen on the high scale.
Wake and rise, and step into the green outdoors.
The green light in the brown box flickered.
The brass tube circled the high wall.
The lobes of her ears were pierced to hold rings.
Hold the hammer near the end to drive the nail.
Next Sunday is the twelfth of the month.
Every word and phrase he speaks is true.
He put his last cartridge into the gun and fired.
They took their kids from the public school.
Drive the screw straight into the wood.
Keep the hatch tight and the watch constant.
Sever the twine with a quick snip of the knife.
Paper will dry out when wet.
Slide the catch back and open the desk.
Help the weak to preserve their strength.
A sullen smile gets few friends.
Stop whistling and watch the boys march.
Jerk the cord, and out tumbles the gold.
Slide the tray across the glass top.
The cloud moved in a stately way and was gone.
Light maple makes for a swell room.
Set the piece here and say nothing.
Dull stories make her laugh.
A stiff cord will do to fasten your shoe.
Get the trust fund to the bank early.
Choose between the high road and the low.
A plea for funds seems to come again.
He lent his coat to the tall gaunt stranger.
There is a strong chance it will happen once more.
The duke left the park in a silver coach.
Greet the new guests and leave quickly.
When the frost has come it is time for turkey.
Sweet words work better than fierce.
A thin stripe runs down the middle.
A six comes up more often than a ten.
Lush fern grow on the lofty rocks.
The ram scared the school children off.
The team with the best timing looks good.
The farmer swapped his horse for a brown ox.
Sit on the perch and tell the others what to do.
A steep trail is painful for our feet.
The early phase of life moves fast.
Green moss grows on the northern side.
Tea in thin china has a sweet taste.
Pitch the straw through the door of the stable.
The latch on the back gate needed a nail.
The goose was brought straight from the old market.
The sink is the thing in which we pile dishes.
A whiff of it will cure the most stubborn cold.
The facts don't always show who is right.
She flaps her cape as she parades the street.
The loss of the cruiser was a blow to the fleet.
Loop the braid to the left and then over.
Plead with the lawyer to drop the lost cause.
Calves thrive on tender spring grass.
Post no bills on this office wall.
Tear a thin sheet from the yellow pad.
A cruise in warm waters in a sleek yacht is fun.
A streak of color ran down the left edge.
It was done before the boy could see it.
Crouch before you jump or miss the mark.
Pack the kits and don't forget the salt.
The square peg will settle in the round hole.
Fine soap saves tender skin.
Poached eggs and tea must suffice.
Bad nerves are jangled by a door slam.
Ship maps are different from those for planes.
Dimes showered down from all sides.
They sang the same tunes at each party.
The sky in the west is tinged with orange red.
The pods of peas ferment in bare fields.
The horse balked and threw the tall rider.
The hitch between the horse and cart broke.
Pile the coal high in the shed corner.
A gold vase is both rare and costly.
The knife was hung inside its bright sheath.
The rarest spice comes from the far East.
The roof should be tilted at a sharp slant.
A smatter of French is worse than none.
The mule trod the treadmill day and night.
The aim of the contest is to raise a great fund.
To send it now in large amounts is bad.
There is a fine hard tang in salty air.
Cod is the main business of the north shore.
The slab was hewn from heavy blocks of slate.
Dunk the stale biscuits into strong drink.
Hang tinsel from both branches.
Cap the jar with a tight brass cover.
The poor boy missed the boat again.
Be sure to set the lamp firmly in the hole.
Pick a card and slip it under the pack.
A round mat will cover the dull spot.
The first part of the plan needs changing.
A good book informs of what we ought to know.
The mail comes in three batches per day.
You cannot brew tea in a cold pot.
Dots of light betrayed the black cat.
Put the chart on the mantel and tack it down.
The night shift men rate extra pay.
The red paper brightened the dim stage.
See the player scoot to third base.
Slide the bill between the two leaves.
Many hands help get the job done.
We don't like to admit our small faults.
No doubt about the way the wind blows.
Dig deep in the earth for pirate's gold.
The steady drip is worse than a drenching rain.
A flat pack takes less luggage space.
Green ice frosted the punch bowl.
A stuffed chair slipped from the moving van.
The stitch will serve but needs to be shortened.
A thin book fits in the side pocket.
The gloss on top made it unfit to read.
The hail pattered on the burnt brown grass.
Seven seals were stamped on great sheets.
Our troops are set to strike heavy blows.
The store was jammed before the sale could start.
It was a bad error on the part of the new judge.
One step more and the board will collapse.
Take the match and strike it against your shoe.
The pot boiled, but the contents failed to jell.
The baby puts his right foot in his mouth.
The bombs left most of the town in ruins.
Stop and stare at the hard working man.
The streets are narrow and full of sharp turns.
The pup jerked the leash as he saw a feline shape.
Open your book to the first page.
Fish evade the net and swim off.
Dip the pail once and let it settle.
Will you please answer that phone.
The big red apple fell to the ground.
The curtain rose and the show was on.
The young prince became heir to the throne.
He sent the boy on a short errand.
Leave now and you will arrive on time.
The corner store was robbed last night.
A gold ring will please most any girl.
The long journey home took a year.
She saw a cat in the neighbor's house.
A pink shell was found on the sandy beach.
Small children came to see him.
The grass and bushes were wet with dew.
The blind man counted his old coins.
A severe storm tore down the barn.
She called his name many times.
When you hear the bell, come quickly.

View File

@ -1,3 +1,5 @@
import html
import random
import time
from pathlib import Path
@ -5,7 +7,7 @@ import gradio as gr
import torch
from extensions.silero_tts import tts_preprocessor
from modules import chat, shared
from modules import chat, shared, ui_chat
from modules.utils import gradio
torch._C._jit_set_profiling_mode(False)
@ -106,6 +108,7 @@ def history_modifier(history):
def output_modifier(string, state):
global model, current_params, streaming_state
for i in params:
if params[i] != current_params[i]:
model = load_model()
@ -116,7 +119,7 @@ def output_modifier(string, state):
return string
original_string = string
string = tts_preprocessor.preprocess(string)
string = tts_preprocessor.preprocess(html.unescape(string))
if string == '':
string = '*Empty reply, try regenerating*'
@ -140,6 +143,35 @@ def setup():
model = load_model()
def random_sentence():
with open(Path("extensions/silero_tts/harvard_sentences.txt")) as f:
return random.choice(list(f))
def voice_preview(preview_text):
global model, current_params, streaming_state
for i in params:
if params[i] != current_params[i]:
model = load_model()
current_params = params.copy()
break
string = tts_preprocessor.preprocess(preview_text or random_sentence())
output_file = Path('extensions/silero_tts/outputs/voice_preview.wav')
prosody = f"<prosody rate=\"{params['voice_speed']}\" pitch=\"{params['voice_pitch']}\">"
silero_input = f'<speak>{prosody}{xmlesc(string)}</prosody></speak>'
model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file))
return f'<audio src="file/{output_file.as_posix()}?{int(time.time())}" controls autoplay></audio>'
def custom_css():
path_to_css = Path(__file__).parent.resolve() / 'style.css'
return open(path_to_css, 'r').read()
def ui():
# Gradio elements
with gr.Accordion("Silero TTS"):
@ -153,14 +185,16 @@ def ui():
v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch')
v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed')
with gr.Row():
preview_text = gr.Text(show_label=False, placeholder="Preview text", elem_id="silero_preview_text")
preview_play = gr.Button("Preview")
preview_audio = gr.HTML(visible=False)
with gr.Row():
convert = gr.Button('Permanently replace audios with the message texts')
convert_cancel = gr.Button('Cancel', visible=False)
convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
gr.Markdown('[Click here for Silero audio samples](https://oobabooga.github.io/silero-samples/index.html)')
if shared.is_chat():
# Convert history with confirmation
convert_arr = [convert_confirm, convert, convert_cancel]
convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
@ -168,7 +202,7 @@ def ui():
lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
remove_tts_from_history, gradio('history'), gradio('history')).then(
chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
chat.redraw_html, shared.reload_inputs, gradio('display'))
chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
@ -177,7 +211,7 @@ def ui():
lambda x: params.update({"show_text": x}), show_text, None).then(
toggle_text_in_history, gradio('history'), gradio('history')).then(
chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then(
chat.redraw_html, shared.reload_inputs, gradio('display'))
chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
# Event functions to update the parameters in the backend
activate.change(lambda x: params.update({"activate": x}), activate, None)
@ -185,3 +219,7 @@ def ui():
voice.change(lambda x: params.update({"speaker": x}), voice, None)
v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None)
v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None)
# Play preview
preview_text.submit(voice_preview, preview_text, preview_audio)
preview_play.click(voice_preview, preview_text, preview_audio)

View File

@ -0,0 +1,8 @@
.SDAP .hires_opts input[type="number"] {
width: 6em !important;
}
/* silero_tts preview */
.form:has(> #silero_preview_text) {
min-width: 75%
}

View File

@ -4,7 +4,7 @@ import textwrap
import gradio as gr
from bs4 import BeautifulSoup
from modules import chat, shared
from modules import chat
from modules.logging_colors import logger
from .chromadb import add_chunks_to_collector, make_collector
@ -96,7 +96,8 @@ def apply_settings(chunk_count, chunk_count_initial, time_weight):
def custom_generate_chat_prompt(user_input, state, **kwargs):
global chat_collector
history = state['history']
# get history as being modified when using regenerate.
history = kwargs['history']
if state['mode'] == 'instruct':
results = collector.get_sorted(user_input, n_results=params['chunk_count'])
@ -142,8 +143,8 @@ def remove_special_tokens(string):
return re.sub(pattern, '', string)
def input_modifier(string):
if shared.is_chat():
def input_modifier(string, state, is_chat=False):
if is_chat:
return string
# Find the user input

View File

@ -0,0 +1,4 @@
user: "GPT4 User:"
bot: "GPT4 Assistant:"
turn_template: "<|user|> <|user-message|><|end_of_turn|><|bot|> <|bot-message|><|end_of_turn|>"
context: ""

View File

@ -0,0 +1,4 @@
user: "### Instruction:"
bot: "### Response:"
turn_template: "<|user|> <|user-message|>\n\n<|bot|> <|bot-message|>\n\n"
context: ""

View File

@ -0,0 +1,4 @@
user: "### User:"
bot: "### Assistant:"
turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n"
context: "### System:\nThis is a system prompt, please behave and help the user.\n\n"

93
js/main.js Normal file
View File

@ -0,0 +1,93 @@
let main_parent = document.getElementById('chat-tab').parentNode;
let extensions = document.getElementById('extensions');
main_parent.childNodes[0].classList.add("header_bar");
main_parent.style = "padding: 0; margin: 0";
main_parent.parentNode.parentNode.style = "padding: 0";
document.querySelector('.header_bar').addEventListener('click', function(event) {
if (event.target.tagName === 'BUTTON') {
const buttonText = event.target.textContent.trim();
let chat_visible = (buttonText == 'Chat');
let default_visible = (buttonText == 'Default');
let notebook_visible = (buttonText == 'Notebook');
// Check if one of the generation tabs is visible
if (chat_visible || notebook_visible || default_visible) {
extensions.style.display = 'flex';
if (chat_visible) {
extensions.style.maxWidth = "800px";
extensions.style.padding = "0px";
} else {
extensions.style.maxWidth = "none";
extensions.style.padding = "15px";
}
} else {
extensions.style.display = 'none';
}
}
});
//------------------------------------------------
// Add some scrollbars
//------------------------------------------------
const textareaElements = document.querySelectorAll('.add_scrollbar textarea');
for(i = 0; i < textareaElements.length; i++) {
textareaElements[i].classList.remove('scroll-hide');
textareaElements[i].classList.add('pretty_scrollbar');
textareaElements[i].style.resize = "none";
}
//------------------------------------------------
// Stop generation on Esc pressed
//------------------------------------------------
document.addEventListener("keydown", function(event) {
if (event.key === "Escape") {
// Find the element with id 'stop' and click it
var stopButton = document.getElementById("stop");
if (stopButton) {
stopButton.click();
}
}
});
//------------------------------------------------
// Chat scrolling
//------------------------------------------------
const targetElement = document.getElementById('chat').parentNode.parentNode.parentNode;
// Create a MutationObserver instance
const observer = new MutationObserver(function(mutations) {
mutations.forEach(function(mutation) {
let childElement = targetElement.childNodes[2].childNodes[0].childNodes[1];
childElement.scrollTop = childElement.scrollHeight;
});
});
// Configure the observer to watch for changes in the subtree and attributes
const config = {
childList: true,
subtree: true,
characterData: true,
attributeOldValue: true,
characterDataOldValue: true
};
// Start observing the target element
observer.observe(targetElement, config);
//------------------------------------------------
// Improve the looks of the chat input field
//------------------------------------------------
document.getElementById('chat-input').parentNode.style.background = 'transparent';
document.getElementById('chat-input').parentNode.style.border = 'none';
//------------------------------------------------
// Remove some backgrounds
//------------------------------------------------
const noBackgroundelements = document.querySelectorAll('.no-background');
for(i = 0; i < noBackgroundelements.length; i++) {
noBackgroundelements[i].parentNode.style.border = 'none';
noBackgroundelements[i].parentNode.parentNode.parentNode.style.alignItems = 'center';
}

40
js/save_files.js Normal file
View File

@ -0,0 +1,40 @@
// Functions for downloading JSON files
function getCurrentTimestamp() {
const now = new Date();
const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert to milliseconds
const localTime = new Date(now.getTime() - timezoneOffset);
const formattedTimestamp = localTime.toISOString().replace(/[-:]/g, '').slice(0, 15);
return formattedTimestamp;
}
function saveFile(contents, filename) {
const element = document.createElement('a');
element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(contents));
element.setAttribute('download', filename);
element.style.display = 'none';
document.body.appendChild(element);
element.click();
document.body.removeChild(element);
}
function saveHistory(history, character, mode) {
let path = null;
if (['chat', 'chat-instruct'].includes(mode) && character && character.trim() !== '') {
path = `history_${character}_${getCurrentTimestamp()}.json`;
} else {
try {
path = `history_${mode}_${getCurrentTimestamp()}.json`;
} catch (error) {
path = `history_${getCurrentTimestamp()}.json`;
}
}
saveFile(history, path);
}
function saveSession(session) {
let path = null;
path = `session_${getCurrentTimestamp()}.json`;
saveFile(session, path);
}

18
js/show_controls.js Normal file
View File

@ -0,0 +1,18 @@
const belowChatInput = document.querySelectorAll("#chat-tab > div > :nth-child(n+3), #extensions");
const chatParent = document.getElementById("chat").parentNode;
function toggle_controls(value) {
if (value) {
belowChatInput.forEach(element => {
element.style.display = "inherit";
});
chatParent.classList.remove("bigchat");
} else {
belowChatInput.forEach(element => {
element.style.display = "none";
});
chatParent.classList.add("bigchat");
}
}

43
js/switch_tabs.js Normal file
View File

@ -0,0 +1,43 @@
let chat_tab = document.getElementById('chat-tab');
let main_parent = chat_tab.parentNode;
function scrollToTop() {
window.scrollTo({
top: 0,
// behavior: 'smooth'
});
}
function switch_to_chat() {
let chat_tab_button = main_parent.childNodes[0].childNodes[1];
chat_tab_button.click();
scrollToTop();
}
function switch_to_default() {
let default_tab_button = main_parent.childNodes[0].childNodes[4];
default_tab_button.click();
scrollToTop();
}
function switch_to_notebook() {
let notebook_tab_button = main_parent.childNodes[0].childNodes[7];
notebook_tab_button.click();
scrollToTop();
}
function switch_to_generation_parameters() {
let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
let generation_tab_button = document.getElementById('character-menu').parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.childNodes[0].childNodes[1];
parameters_tab_button.click();
generation_tab_button.click();
scrollToTop();
}
function switch_to_character() {
let parameters_tab_button = main_parent.childNodes[0].childNodes[10];
let character_tab_button = document.getElementById('character-menu').parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.childNodes[0].childNodes[4];
parameters_tab_button.click();
character_tab_button.click();
scrollToTop();
}

Some files were not shown because too many files have changed in this diff Show More