diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb
index 82e6c18e..8e305e1d 100644
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@@ -22,7 +22,7 @@
"source": [
"# oobabooga/text-generation-webui\n",
"\n",
- "After running both cells, a public gradio URL will appear at the bottom in a few minutes. You can optionally generate an API link.\n",
+ "After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n",
"\n",
"* Project page: https://github.com/oobabooga/text-generation-webui\n",
"* Gradio server status: https://status.gradio.app/"
@@ -53,44 +53,28 @@
"\n",
"#@markdown If unsure about the branch, write \"main\" or leave it blank.\n",
"\n",
- "import torch\n",
+ "import os\n",
"from pathlib import Path\n",
"\n",
+ "os.environ.pop('PYTHONPATH', None)\n",
+ "\n",
"if Path.cwd().name != 'text-generation-webui':\n",
- " print(\"Installing the webui...\")\n",
+ " print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n",
"\n",
" !git clone https://github.com/oobabooga/text-generation-webui\n",
" %cd text-generation-webui\n",
"\n",
- " torver = torch.__version__\n",
- " print(f\"TORCH: {torver}\")\n",
- " is_cuda118 = '+cu118' in torver # 2.1.0+cu118\n",
- "\n",
- " if is_cuda118:\n",
- " !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu118\n",
- " else:\n",
- " !python -m pip install --upgrade torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1 --index-url https://download.pytorch.org/whl/cu121\n",
- "\n",
- " textgen_requirements = open('requirements.txt').read().splitlines()\n",
- " if is_cuda118:\n",
- " textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements]\n",
- " with open('temp_requirements.txt', 'w') as file:\n",
- " file.write('\\n'.join(textgen_requirements))\n",
- "\n",
- " !pip install -r temp_requirements.txt --upgrade\n",
- "\n",
- " print(\"\\033[1;32;1m\\n --> If you see a warning about \\\"previously imported packages\\\", just ignore it.\\033[0;37;0m\")\n",
- " print(\"\\033[1;32;1m\\n --> There is no need to restart the runtime.\\n\\033[0;37;0m\")\n",
- "\n",
- " try:\n",
- " import flash_attn\n",
- " except:\n",
- " !pip uninstall -y flash_attn\n",
+ " # Install the project in an isolated environment\n",
+ " !GPU_CHOICE=A \\\n",
+ " USE_CUDA118=FALSE \\\n",
+ " LAUNCH_AFTER_INSTALL=FALSE \\\n",
+ " INSTALL_EXTENSIONS=FALSE \\\n",
+ " ./start_linux.sh\n",
"\n",
"# Parameters\n",
- "model_url = \"https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ\" #@param {type:\"string\"}\n",
- "branch = \"gptq-4bit-32g-actorder_True\" #@param {type:\"string\"}\n",
- "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n",
+ "model_url = \"https://huggingface.co/turboderp/gemma-2-9b-it-exl2\" #@param {type:\"string\"}\n",
+ "branch = \"8.0bpw\" #@param {type:\"string\"}\n",
+ "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant --no_flash_attn\" #@param {type:\"string\"}\n",
"api = False #@param {type:\"boolean\"}\n",
"\n",
"if api:\n",
@@ -116,11 +100,10 @@
" output_folder = \"\"\n",
"\n",
"# Start the web UI\n",
- "cmd = f\"python server.py --share\"\n",
+ "cmd = f\"./start_linux.sh {command_line_flags} --share\"\n",
"if output_folder != \"\":\n",
" cmd += f\" --model {output_folder}\"\n",
- "cmd += f\" {command_line_flags}\"\n",
- "print(cmd)\n",
+ "\n",
"!$cmd"
],
"metadata": {
diff --git a/README.md b/README.md
index 1b8de70a..22ceeeb8 100644
--- a/README.md
+++ b/README.md
@@ -10,27 +10,31 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
## Features
-* 3 interface modes: default (two columns), notebook, and chat.
-* Multiple model backends: [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM).
-* Dropdown menu for quickly switching between different models.
-* Large number of extensions (built-in and user-contributed), including Coqui TTS for realistic voice outputs, Whisper STT for voice inputs, translation, [multimodal pipelines](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal), vector databases, Stable Diffusion integration, and a lot more. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
-* [Chat with custom characters](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab#character).
-* Precise chat templates for instruction-following models, including Llama-2-chat, Alpaca, Vicuna, Mistral.
-* LoRA: train new LoRAs with your own data, load/unload LoRAs on the fly for generation.
-* Transformers library integration: load models in 4-bit or 8-bit precision through bitsandbytes, use llama.cpp with transformers samplers (`llamacpp_HF` loader), CPU inference in 32-bit precision using PyTorch.
-* OpenAI-compatible API server with Chat and Completions endpoints -- see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples).
+* Multiple backends for text generation in a single UI and API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) are also supported through the Transformers loader.
+* OpenAI-compatible API server with Chat and Completions endpoints – see the [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples).
+* Automatic prompt formatting for each model using the Jinja2 template in its metadata.
+* Three chat modes: `instruct`, `chat-instruct`, and `chat`, allowing for both instruction-following and casual conversations with characters. `chat-instruct` mode automatically applies the model's template to the chat prompt, ensuring high-quality outputs without manual setup.
+* "Past chats" menu to quickly switch between conversations and start new ones.
+* Free-form generation in the Default/Notebook tabs without being limited to chat turns. Send formatted chat conversations from the Chat tab to these tabs.
+* Multiple sampling parameters and generation options for sophisticated text generation control.
+* Easy switching between different models through the UI without restarting, using the "Model" tab.
+* Simple LoRA fine-tuning tool to customize models with your data.
+* All in one folder. The requirements are installed in a self-contained `installer_files` folder that doesn't interfere with the system's environment.
+* Extensions support, including numerous built-in and user-contributed extensions. See [the wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [the extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
## How to install
1) Clone or [download](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) the repository.
2) Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS.
3) Select your GPU vendor when asked.
-4) Once the installation ends, browse to `http://localhost:7860/?__theme=dark`.
+4) Once the installation ends, browse to `http://localhost:7860`.
5) Have fun!
-To restart the web UI in the future, just run the `start_` script again. This script creates an `installer_files` folder where it sets up the project's requirements. In case you need to reinstall the requirements, you can simply delete that folder and start the web UI again.
+To restart the web UI in the future, run the `start_` script again.
-The script accepts command-line flags. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there.
+This script creates an `installer_files` folder where it sets up the project's requirements. If you need to reinstall the requirements, just delete that folder and start the web UI again.
+
+The script accepts command-line flags, such as `./start_linux.sh --help`. Alternatively, you can edit the `CMD_FLAGS.txt` file with a text editor and add your flags there, such as `--api` in case you need to use the API.
To get updates in the future, run `update_wizard_linux.sh`, `update_wizard_windows.bat`, `update_wizard_macos.sh`, or `update_wizard_wsl.bat`.
@@ -204,16 +208,16 @@ List of command-line flags
usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--chat-buttons] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices]
[--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code]
- [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn]
- [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS]
- [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE]
- [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN]
- [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--no_inject_fused_attention]
- [--hqq-backend HQQ_BACKEND] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE]
- [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH]
- [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT]
- [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]]
- [--checkpoint CHECKPOINT] [--monkey-patch]
+ [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE]
+ [--flash-attn] [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock]
+ [--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm]
+ [--attention-sink-size ATTENTION_SINK_SIZE] [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn]
+ [--no_xformers] [--no_sdpa] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act]
+ [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
+ [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT]
+ [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
+ [--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui]
+ [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention]
Text generation web UI
@@ -237,7 +241,7 @@ Basic settings:
Model loader:
--loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2,
- AutoGPTQ, AutoAWQ.
+ AutoGPTQ.
Transformers/Accelerate:
--cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow.
@@ -254,6 +258,7 @@ Transformers/Accelerate:
--force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution.
--no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
--use_flash_attention_2 Set use_flash_attention_2=True while loading the model.
+ --use_eager_attention Set attn_implementation= eager while loading the model.
bitsandbytes 4-bit:
--load-in-4bit Load the model with 4-bit precision (using bitsandbytes).
@@ -263,7 +268,7 @@ bitsandbytes 4-bit:
llama.cpp:
--flash-attn Use flash-attention.
- --tensorcores Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.
+ --tensorcores NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.
--n_ctx N_CTX Size of the prompt context.
--threads THREADS Number of threads to use.
--threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing.
@@ -272,7 +277,7 @@ llama.cpp:
--no-mmap Prevent mmap from being used.
--mlock Force the system to keep the model in RAM.
--n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU.
- --tensor_split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17.
+ --tensor_split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.
--numa Activate NUMA task allocation for llama.cpp.
--logits_all Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.
--no_offload_kqv Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.
@@ -280,6 +285,7 @@ llama.cpp:
--row_split Split the model by rows across GPUs. This may improve multi-gpu performance.
--streaming-llm Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.
--attention-sink-size ATTENTION_SINK_SIZE StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt.
+ --tokenizer-dir TOKENIZER_DIR Load the tokenizer from this folder. Meant to be used with llamacpp_HF through the command-line.
ExLlamaV2:
--gpu-split GPU_SPLIT Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.
@@ -287,6 +293,8 @@ ExLlamaV2:
--max_seq_len MAX_SEQ_LEN Maximum sequence length.
--cfg-cache ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.
--no_flash_attn Force flash-attention to not be used.
+ --no_xformers Force xformers to not be used.
+ --no_sdpa Force Torch SDPA to not be used.
--cache_8bit Use 8-bit cache to save VRAM.
--cache_4bit Use Q4 cache to save VRAM.
--num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral.
@@ -301,12 +309,12 @@ AutoGPTQ:
--wbits WBITS Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.
--groupsize GROUPSIZE Group size.
-AutoAWQ:
- --no_inject_fused_attention Disable the use of fused attention, which will use less VRAM at the cost of slower inference.
-
HQQ:
--hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.
+TensorRT-LLM:
+ --cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet.
+
DeepSpeed:
--deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.
--nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading.
@@ -327,6 +335,7 @@ Gradio:
--gradio-auth-path GRADIO_AUTH_PATH Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.
--ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file.
--ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file.
+ --subpath SUBPATH Customize the subpath for gradio, use with reverse proxy
API:
--api Enable the API extension.
@@ -392,18 +401,11 @@ Run `python download-model.py --help` to see all the options.
https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/main/Colab-TextGen-GPU.ipynb
+## Community
+
+* Subreddit: https://www.reddit.com/r/Oobabooga/
+* Discord: https://discord.gg/jwZCF2dPQN
+
## Acknowledgment
In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
-
-## Links
-
-#### Community
-
-* Subreddit: https://www.reddit.com/r/oobabooga/
-* Discord: https://discord.gg/jwZCF2dPQN
-
-#### Support
-
-* ko-fi: https://ko-fi.com/oobabooga
-* GitHub Sponsors: https://github.com/sponsors/oobabooga
diff --git a/cmd_linux.sh b/cmd_linux.sh
index 1685050a..576dbf02 100755
--- a/cmd_linux.sh
+++ b/cmd_linux.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
cd "$(dirname "${BASH_SOURCE[0]}")"
diff --git a/css/chat_style-TheEncrypted777.css b/css/chat_style-TheEncrypted777.css
index f823eef0..6404f41d 100644
--- a/css/chat_style-TheEncrypted777.css
+++ b/css/chat_style-TheEncrypted777.css
@@ -90,6 +90,7 @@
line-height: 1.428571429 !important;
color: rgb(243 244 246) !important;
text-shadow: 2px 2px 2px rgb(0 0 0);
+ font-weight: 500;
}
.message-body p em {
diff --git a/css/chat_style-cai-chat.css b/css/chat_style-cai-chat.css
index ba0c8f13..618184cf 100644
--- a/css/chat_style-cai-chat.css
+++ b/css/chat_style-cai-chat.css
@@ -46,6 +46,7 @@
.message-body p {
font-size: 15px !important;
line-height: 22.5px !important;
+ font-weight: 500;
}
.message-body p, .chat .message-body ul, .chat .message-body ol {
@@ -59,4 +60,4 @@
.message-body p em {
color: rgb(110 110 110) !important;
font-weight: 500;
-}
\ No newline at end of file
+}
diff --git a/css/chat_style-messenger.css b/css/chat_style-messenger.css
index 6bb97971..f0fd1578 100644
--- a/css/chat_style-messenger.css
+++ b/css/chat_style-messenger.css
@@ -88,6 +88,7 @@
margin-bottom: 0 !important;
font-size: 15px !important;
line-height: 1.428571429 !important;
+ font-weight: 500;
}
.dark .message-body p em {
diff --git a/css/chat_style-wpp.css b/css/chat_style-wpp.css
index ac4fd39a..30ca61f3 100644
--- a/css/chat_style-wpp.css
+++ b/css/chat_style-wpp.css
@@ -44,6 +44,7 @@
margin-bottom: 0 !important;
font-size: 15px !important;
line-height: 1.428571429 !important;
+ font-weight: 500;
}
.dark .message-body p em {
@@ -52,4 +53,4 @@
.message-body p em {
color: rgb(110 110 110) !important;
-}
\ No newline at end of file
+}
diff --git a/css/highlightjs/github-dark.min.css b/css/highlightjs/github-dark.min.css
index 03b6da8b..469b94a3 100644
--- a/css/highlightjs/github-dark.min.css
+++ b/css/highlightjs/github-dark.min.css
@@ -1,4 +1,14 @@
-pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5px}/*!
+html body gradio-app .gradio-container pre code.hljs {
+ display: block;
+ overflow-x: auto;
+ padding: 1em
+}
+
+html body gradio-app .gradio-container code.hljs {
+ padding: 3px 5px
+}
+
+/*!
Theme: GitHub Dark
Description: Dark theme as seen on github.com
Author: github.com
@@ -7,4 +17,95 @@ pre code.hljs{display:block;overflow-x:auto;padding:1em}code.hljs{padding:3px 5p
Outdated base version: https://github.com/primer/github-syntax-dark
Current colors taken from GitHub's CSS
-*/.hljs{color:#c9d1d9;background:#0d1117}.hljs-doctag,.hljs-keyword,.hljs-meta .hljs-keyword,.hljs-template-tag,.hljs-template-variable,.hljs-type,.hljs-variable.language_{color:#ff7b72}.hljs-title,.hljs-title.class_,.hljs-title.class_.inherited__,.hljs-title.function_{color:#d2a8ff}.hljs-attr,.hljs-attribute,.hljs-literal,.hljs-meta,.hljs-number,.hljs-operator,.hljs-selector-attr,.hljs-selector-class,.hljs-selector-id,.hljs-variable{color:#79c0ff}.hljs-meta .hljs-string,.hljs-regexp,.hljs-string{color:#a5d6ff}.hljs-built_in,.hljs-symbol{color:#ffa657}.hljs-code,.hljs-comment,.hljs-formula{color:#8b949e}.hljs-name,.hljs-quote,.hljs-selector-pseudo,.hljs-selector-tag{color:#7ee787}.hljs-subst{color:#c9d1d9}.hljs-section{color:#1f6feb;font-weight:700}.hljs-bullet{color:#f2cc60}.hljs-emphasis{color:#c9d1d9;font-style:italic}.hljs-strong{color:#c9d1d9;font-weight:700}.hljs-addition{color:#aff5b4;background-color:#033a16}.hljs-deletion{color:#ffdcd7;background-color:#67060c}
\ No newline at end of file
+*/
+html body gradio-app .gradio-container .hljs {
+ color: #c9d1d9;
+ background: #0d1117
+}
+
+html body gradio-app .gradio-container .hljs-doctag,
+html body gradio-app .gradio-container .hljs-keyword,
+html body gradio-app .gradio-container .hljs-meta .hljs-keyword,
+html body gradio-app .gradio-container .hljs-template-tag,
+html body gradio-app .gradio-container .hljs-template-variable,
+html body gradio-app .gradio-container .hljs-type,
+html body gradio-app .gradio-container .hljs-variable.language_ {
+ color: #ff7b72
+}
+
+html body gradio-app .gradio-container .hljs-title,
+html body gradio-app .gradio-container .hljs-title.class_,
+html body gradio-app .gradio-container .hljs-title.class_.inherited__,
+html body gradio-app .gradio-container .hljs-title.function_ {
+ color: #d2a8ff
+}
+
+html body gradio-app .gradio-container .hljs-attr,
+html body gradio-app .gradio-container .hljs-attribute,
+html body gradio-app .gradio-container .hljs-literal,
+html body gradio-app .gradio-container .hljs-meta,
+html body gradio-app .gradio-container .hljs-number,
+html body gradio-app .gradio-container .hljs-operator,
+html body gradio-app .gradio-container .hljs-selector-attr,
+html body gradio-app .gradio-container .hljs-selector-class,
+html body gradio-app .gradio-container .hljs-selector-id,
+html body gradio-app .gradio-container .hljs-variable {
+ color: #79c0ff
+}
+
+html body gradio-app .gradio-container .hljs-meta .hljs-string,
+html body gradio-app .gradio-container .hljs-regexp,
+html body gradio-app .gradio-container .hljs-string {
+ color: #a5d6ff
+}
+
+html body gradio-app .gradio-container .hljs-built_in,
+html body gradio-app .gradio-container .hljs-symbol {
+ color: #ffa657
+}
+
+html body gradio-app .gradio-container .hljs-code,
+html body gradio-app .gradio-container .hljs-comment,
+html body gradio-app .gradio-container .hljs-formula {
+ color: #8b949e
+}
+
+html body gradio-app .gradio-container .hljs-name,
+html body gradio-app .gradio-container .hljs-quote,
+html body gradio-app .gradio-container .hljs-selector-pseudo,
+html body gradio-app .gradio-container .hljs-selector-tag {
+ color: #7ee787
+}
+
+html body gradio-app .gradio-container .hljs-subst {
+ color: #c9d1d9
+}
+
+html body gradio-app .gradio-container .hljs-section {
+ color: #1f6feb;
+ font-weight: 700
+}
+
+html body gradio-app .gradio-container .hljs-bullet {
+ color: #f2cc60
+}
+
+html body gradio-app .gradio-container .hljs-emphasis {
+ color: #c9d1d9;
+ font-style: italic
+}
+
+html body gradio-app .gradio-container .hljs-strong {
+ color: #c9d1d9;
+ font-weight: 700
+}
+
+html body gradio-app .gradio-container .hljs-addition {
+ color: #aff5b4;
+ background-color: #033a16
+}
+
+html body gradio-app .gradio-container .hljs-deletion {
+ color: #ffdcd7;
+ background-color: #67060c
+}
diff --git a/css/highlightjs/github.min.css b/css/highlightjs/github.min.css
new file mode 100644
index 00000000..fbc58cca
--- /dev/null
+++ b/css/highlightjs/github.min.css
@@ -0,0 +1,111 @@
+html body gradio-app .gradio-container pre code.hljs {
+ display: block;
+ overflow-x: auto;
+ padding: 1em
+}
+
+html body gradio-app .gradio-container code.hljs {
+ padding: 3px 5px
+}
+
+/*!
+ Theme: GitHub
+ Description: Light theme as seen on github.com
+ Author: github.com
+ Maintainer: @Hirse
+ Updated: 2021-05-15
+
+ Outdated base version: https://github.com/primer/github-syntax-light
+ Current colors taken from GitHub's CSS
+*/
+html body gradio-app .gradio-container .hljs {
+ color: #24292e;
+ background: #fff
+}
+
+html body gradio-app .gradio-container .hljs-doctag,
+html body gradio-app .gradio-container .hljs-keyword,
+html body gradio-app .gradio-container .hljs-meta .hljs-keyword,
+html body gradio-app .gradio-container .hljs-template-tag,
+html body gradio-app .gradio-container .hljs-template-variable,
+html body gradio-app .gradio-container .hljs-type,
+html body gradio-app .gradio-container .hljs-variable.language_ {
+ color: #d73a49
+}
+
+html body gradio-app .gradio-container .hljs-title,
+html body gradio-app .gradio-container .hljs-title.class_,
+html body gradio-app .gradio-container .hljs-title.class_.inherited__,
+html body gradio-app .gradio-container .hljs-title.function_ {
+ color: #6f42c1
+}
+
+html body gradio-app .gradio-container .hljs-attr,
+html body gradio-app .gradio-container .hljs-attribute,
+html body gradio-app .gradio-container .hljs-literal,
+html body gradio-app .gradio-container .hljs-meta,
+html body gradio-app .gradio-container .hljs-number,
+html body gradio-app .gradio-container .hljs-operator,
+html body gradio-app .gradio-container .hljs-selector-attr,
+html body gradio-app .gradio-container .hljs-selector-class,
+html body gradio-app .gradio-container .hljs-selector-id,
+html body gradio-app .gradio-container .hljs-variable {
+ color: #005cc5
+}
+
+html body gradio-app .gradio-container .hljs-meta .hljs-string,
+html body gradio-app .gradio-container .hljs-regexp,
+html body gradio-app .gradio-container .hljs-string {
+ color: #032f62
+}
+
+html body gradio-app .gradio-container .hljs-built_in,
+html body gradio-app .gradio-container .hljs-symbol {
+ color: #e36209
+}
+
+html body gradio-app .gradio-container .hljs-code,
+html body gradio-app .gradio-container .hljs-comment,
+html body gradio-app .gradio-container .hljs-formula {
+ color: #6a737d
+}
+
+html body gradio-app .gradio-container .hljs-name,
+html body gradio-app .gradio-container .hljs-quote,
+html body gradio-app .gradio-container .hljs-selector-pseudo,
+html body gradio-app .gradio-container .hljs-selector-tag {
+ color: #22863a
+}
+
+html body gradio-app .gradio-container .hljs-subst {
+ color: #24292e
+}
+
+html body gradio-app .gradio-container .hljs-section {
+ color: #005cc5;
+ font-weight: 700
+}
+
+html body gradio-app .gradio-container .hljs-bullet {
+ color: #735c0f
+}
+
+html body gradio-app .gradio-container .hljs-emphasis {
+ color: #24292e;
+ font-style: italic
+}
+
+html body gradio-app .gradio-container .hljs-strong {
+ color: #24292e;
+ font-weight: 700
+}
+
+html body gradio-app .gradio-container .hljs-addition {
+ color: #22863a;
+ background-color: #f0fff4
+}
+
+html body gradio-app .gradio-container .hljs-deletion {
+ color: #b31d28;
+ background-color: #ffeef0
+}
diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index 8a31d6e2..50b9402f 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -39,14 +39,6 @@
margin-bottom: 0 !important;
}
-.dark .message-body p em {
- color: rgb(198 202 214) !important;
-}
-
-.message-body p em {
- color: rgb(110 110 110) !important;
-}
-
.gradio-container .chat .assistant-message {
padding: 20px;
background: #f4f4f4;
diff --git a/css/main.css b/css/main.css
index 5768348e..cf3babdb 100644
--- a/css/main.css
+++ b/css/main.css
@@ -62,10 +62,6 @@ ol li p, ul li p {
border: 0;
}
-.gradio-container-3-18-0 .prose * h1, h2, h3, h4 {
- color: white;
-}
-
.gradio-container {
max-width: 100% !important;
padding-top: 0 !important;
@@ -378,6 +374,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
}
}
+.chat-parent .prose {
+ visibility: visible;
+}
+
.old-ui .chat-parent {
height: calc(100dvh - 192px - var(--header-height) - var(--input-delta));
margin-bottom: var(--input-delta) !important;
@@ -399,6 +399,22 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
padding-bottom: 15px !important;
}
+.message-body h1,
+.message-body h2,
+.message-body h3,
+.message-body h4 {
+ color: var(--body-text-color);
+ margin: 20px 0 10px 0;
+}
+
+.dark .message q {
+ color: #f5b031;
+}
+
+.message-body q::before, .message-body q::after {
+ content: "";
+}
+
.message-body li {
list-style-position: outside;
}
@@ -447,6 +463,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
border-radius: 5px;
font-size: 82%;
padding: 1px 3px;
+ background: white !important;
+ color: #1f2328;
+}
+
+.dark .message-body code {
background: #0d1117 !important;
color: rgb(201 209 217);
}
@@ -796,4 +817,3 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
max-height: 300px;
}
}
-
diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md
index b00a1f34..9b4f89bf 100644
--- a/docs/12 - OpenAI API.md
+++ b/docs/12 - OpenAI API.md
@@ -19,7 +19,7 @@ Add `--api` to your command-line flags.
### Examples
-For the documentation with all the parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file.
+For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file.
The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters).
@@ -114,6 +114,30 @@ curl -k http://127.0.0.1:5000/v1/internal/logits \
}'
```
+#### List models
+
+```shell
+curl -k http://127.0.0.1:5000/v1/internal/model/list \
+ -H "Content-Type: application/json"
+```
+
+#### Load model
+
+```shell
+curl -k http://127.0.0.1:5000/v1/internal/model/load \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model_name": "model_name",
+ "args": {
+ "load_in_4bit": true,
+ "n_gpu_layers": 12
+ },
+ "settings": {
+ "instruction_template": "Alpaca"
+ }
+ }'
+```
+
#### Python chat example
```python
diff --git a/download-model.py b/download-model.py
index 08b23d08..306784a3 100644
--- a/download-model.py
+++ b/download-model.py
@@ -29,6 +29,7 @@ base = os.environ.get("HF_ENDPOINT") or "https://huggingface.co"
class ModelDownloader:
def __init__(self, max_retries=5):
self.max_retries = max_retries
+ self.session = self.get_session()
def get_session(self):
session = requests.Session()
@@ -72,7 +73,7 @@ class ModelDownloader:
return model, branch
def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None):
- session = self.get_session()
+ session = self.session
page = f"/api/models/{model}/tree/{branch}"
cursor = b""
@@ -192,7 +193,7 @@ class ModelDownloader:
attempt = 0
while attempt < max_retries:
attempt += 1
- session = self.get_session()
+ session = self.session
headers = {}
mode = 'wb'
@@ -212,11 +213,15 @@ class ModelDownloader:
total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1MB
+ filename_str = str(filename) # Convert PosixPath to string if necessary
+
tqdm_kwargs = {
'total': total_size,
- 'unit': 'iB',
+ 'unit': 'B',
'unit_scale': True,
- 'bar_format': '{l_bar}{bar}| {n_fmt}/{total_fmt} {rate_fmt}'
+ 'unit_divisor': 1024,
+ 'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
+ 'desc': f"{filename_str}: "
}
if 'COLAB_GPU' in os.environ:
@@ -233,7 +238,7 @@ class ModelDownloader:
t.update(len(data))
if total_size != 0 and self.progress_bar is not None:
count += len(data)
- self.progress_bar(float(count) / float(total_size), f"{filename}")
+ self.progress_bar(float(count) / float(total_size), f"{filename_str}")
break # Exit loop if successful
except (RequestException, ConnectionError, Timeout) as e:
diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index 44c1df86..646dee2d 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -319,7 +319,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
yield {'prompt': prompt}
return
- token_count = len(encode(prompt)[0])
debug_msg({'prompt': prompt, 'generate_params': generate_params})
if stream:
@@ -330,7 +329,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
answer = ''
seen_content = ''
- completion_token_count = 0
for a in generator:
answer = a['internal'][-1][1]
@@ -345,6 +343,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
chunk = chat_streaming_chunk(new_content)
yield chunk
+ token_count = len(encode(prompt)[0])
completion_token_count = len(encode(answer)[0])
stop_reason = "stop"
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= generate_params['max_new_tokens']:
@@ -429,8 +428,6 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
prompt = decode(prompt)[0]
prefix = prompt if echo else ''
- token_count = len(encode(prompt)[0])
- total_prompt_token_count += token_count
# generate reply #######################################
debug_msg({'prompt': prompt, 'generate_params': generate_params})
@@ -440,6 +437,8 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False):
for a in generator:
answer = a
+ token_count = len(encode(prompt)[0])
+ total_prompt_token_count += token_count
completion_token_count = len(encode(answer)[0])
total_completion_token_count += completion_token_count
stop_reason = "stop"
diff --git a/js/dark_theme.js b/js/dark_theme.js
new file mode 100644
index 00000000..b540fb11
--- /dev/null
+++ b/js/dark_theme.js
@@ -0,0 +1,9 @@
+function toggleDarkMode() {
+ document.body.classList.toggle("dark");
+ var currentCSS = document.getElementById("highlight-css");
+ if (currentCSS.getAttribute("href") === "file/css/highlightjs/github-dark.min.css") {
+ currentCSS.setAttribute("href", "file/css/highlightjs/github.min.css");
+ } else {
+ currentCSS.setAttribute("href", "file/css/highlightjs/github-dark.min.css");
+ }
+}
diff --git a/js/main.js b/js/main.js
index 6b456517..899bd8f0 100644
--- a/js/main.js
+++ b/js/main.js
@@ -218,7 +218,6 @@ function doSyntaxHighlighting() {
{ left: "\\[", right: "\\]", display: true },
],
});
-
});
observer.observe(targetElement, config);
@@ -445,14 +444,12 @@ function updateCssProperties() {
// Check if the chat container is visible
if (chatContainer.clientHeight > 0) {
-
- // Calculate new chat height and adjust CSS properties
var numericHeight = chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100;
if (document.getElementById("chat-tab").style.paddingBottom != "") {
numericHeight += 20;
}
- const newChatHeight = `${numericHeight}px`;
+ const newChatHeight = `${numericHeight}px`;
document.documentElement.style.setProperty("--chat-height", newChatHeight);
document.documentElement.style.setProperty("--input-delta", `${chatInputHeight - 40}px`);
@@ -463,15 +460,19 @@ function updateCssProperties() {
// Adjust scrollTop based on input height change
if (chatInputHeight !== currentChatInputHeight) {
- chatContainer.scrollTop += chatInputHeight > currentChatInputHeight ? chatInputHeight : -chatInputHeight + 40;
+ if (!isScrolled && chatInputHeight < currentChatInputHeight) {
+ chatContainer.scrollTop = chatContainer.scrollHeight;
+ } else {
+ chatContainer.scrollTop += chatInputHeight - currentChatInputHeight;
+ }
+
currentChatInputHeight = chatInputHeight;
}
}
}
// Observe textarea size changes and call update function
-new ResizeObserver(updateCssProperties)
- .observe(document.querySelector("#chat-input textarea"));
+new ResizeObserver(updateCssProperties).observe(document.querySelector("#chat-input textarea"));
// Handle changes in window size
window.addEventListener("resize", updateCssProperties);
diff --git a/modules/LoRA.py b/modules/LoRA.py
index eda5e406..117022cf 100644
--- a/modules/LoRA.py
+++ b/modules/LoRA.py
@@ -72,8 +72,6 @@ def add_lora_autogptq(lora_names):
else:
if len(lora_names) > 1:
logger.warning('AutoGPTQ can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.')
- if not shared.args.no_inject_fused_attention:
- logger.warning('Fused Attention + AutoGPTQ may break Lora loading. Disable it.')
peft_config = GPTQLoraConfig(
inference_mode=True,
diff --git a/modules/block_requests.py b/modules/block_requests.py
index 778b9f5a..886930f0 100644
--- a/modules/block_requests.py
+++ b/modules/block_requests.py
@@ -3,6 +3,7 @@ import io
import requests
+from modules import shared
from modules.logging_colors import logger
original_open = open
@@ -54,6 +55,7 @@ def my_open(*args, **kwargs):
'\n '
'\n '
'\n '
+ f'\n '
'\n '
'\n '
)
diff --git a/modules/chat.py b/modules/chat.py
index 6640776f..00c4ffa9 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -17,7 +17,11 @@ from PIL import Image
import modules.shared as shared
from modules import utils
from modules.extensions import apply_extensions
-from modules.html_generator import chat_html_wrapper, make_thumbnail
+from modules.html_generator import (
+ chat_html_wrapper,
+ convert_to_markdown,
+ make_thumbnail
+)
from modules.logging_colors import logger
from modules.text_generation import (
generate_reply,
@@ -88,8 +92,16 @@ def generate_chat_prompt(user_input, state, **kwargs):
chat_template_str = replace_character_names(chat_template_str, state['name1'], state['name2'])
instruction_template = jinja_env.from_string(state['instruction_template_str'])
- instruct_renderer = partial(instruction_template.render, add_generation_prompt=False)
chat_template = jinja_env.from_string(chat_template_str)
+
+ instruct_renderer = partial(
+ instruction_template.render,
+ builtin_tools=None,
+ tools=None,
+ tools_in_user_message=False,
+ add_generation_prompt=False
+ )
+
chat_renderer = partial(
chat_template.render,
add_generation_prompt=False,
@@ -367,7 +379,6 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
def impersonate_wrapper(text, state):
-
static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
prompt = generate_chat_prompt('', state, impersonate=True)
@@ -421,9 +432,12 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
send_dummy_message(text, state)
send_dummy_reply(state['start_with'], state)
+ history = state['history']
for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)):
yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history
+ save_history(history, state['unique_id'], state['character_menu'], state['mode'])
+
def remove_last_message(history):
if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>':
@@ -484,7 +498,7 @@ def start_new_chat(state):
greeting = replace_character_names(state['greeting'], state['name1'], state['name2'])
if greeting != '':
history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]]
- history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]]
+ history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]]
unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
save_history(history, unique_id, state['character_menu'], state['mode'])
@@ -995,3 +1009,207 @@ def my_yaml_output(data):
result += " " + line.rstrip(' ') + "\n"
return result
+
+
+def handle_replace_last_reply_click(text, state):
+ history = replace_last_reply(text, state)
+ save_history(history, state['unique_id'], state['character_menu'], state['mode'])
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ return [history, html, ""]
+
+
+def handle_send_dummy_message_click(text, state):
+ history = send_dummy_message(text, state)
+ save_history(history, state['unique_id'], state['character_menu'], state['mode'])
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ return [history, html, ""]
+
+
+def handle_send_dummy_reply_click(text, state):
+ history = send_dummy_reply(text, state)
+ save_history(history, state['unique_id'], state['character_menu'], state['mode'])
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ return [history, html, ""]
+
+
+def handle_remove_last_click(state):
+ last_input, history = remove_last_message(state['history'])
+ save_history(history, state['unique_id'], state['character_menu'], state['mode'])
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ return [history, html, last_input]
+
+
+def handle_unique_id_select(state):
+ history = load_history(state['unique_id'], state['character_menu'], state['mode'])
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ convert_to_markdown.cache_clear()
+
+ return [history, html]
+
+
+def handle_start_new_chat_click(state):
+ history = start_new_chat(state)
+ histories = find_all_histories_with_first_prompts(state)
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ convert_to_markdown.cache_clear()
+
+ return [history, html, gr.update(choices=histories, value=histories[0][1])]
+
+
+def handle_delete_chat_confirm_click(state):
+ index = str(find_all_histories(state).index(state['unique_id']))
+ delete_history(state['unique_id'], state['character_menu'], state['mode'])
+ history, unique_id = load_history_after_deletion(state, index)
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ convert_to_markdown.cache_clear()
+
+ return [
+ history,
+ html,
+ unique_id,
+ gr.update(visible=False),
+ gr.update(visible=True),
+ gr.update(visible=False)
+ ]
+
+
+def handle_rename_chat_click():
+ return [
+ gr.update(visible=True, value="My New Chat"),
+ gr.update(visible=True),
+ gr.update(visible=True)
+ ]
+
+
+def handle_rename_chat_confirm(rename_to, state):
+ rename_history(state['unique_id'], rename_to, state['character_menu'], state['mode'])
+ histories = find_all_histories_with_first_prompts(state)
+
+ return [
+ gr.update(choices=histories, value=rename_to),
+ gr.update(visible=False),
+ gr.update(visible=False),
+ gr.update(visible=False)
+ ]
+
+
+def handle_upload_chat_history(load_chat_history, state):
+ history = start_new_chat(state)
+ history = load_history_json(load_chat_history, history)
+ histories = find_all_histories_with_first_prompts(state)
+ save_history(history, state['unique_id'], state['character_menu'], state['mode'])
+
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ convert_to_markdown.cache_clear()
+
+ return [
+ history,
+ html,
+ gr.update(choices=histories, value=histories[0][1])
+ ]
+
+
+def handle_character_menu_change(state):
+ name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2'])
+
+ state['name1'] = name1
+ state['name2'] = name2
+ state['character_picture'] = picture
+ state['greeting'] = greeting
+ state['context'] = context
+
+ history = load_latest_history(state)
+ histories = find_all_histories_with_first_prompts(state)
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ convert_to_markdown.cache_clear()
+
+ return [
+ history,
+ html,
+ name1,
+ name2,
+ picture,
+ greeting,
+ context,
+ gr.update(choices=histories, value=histories[0][1]),
+ ]
+
+
+def handle_mode_change(state):
+ history = load_latest_history(state)
+ histories = find_all_histories_with_first_prompts(state)
+ html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
+
+ convert_to_markdown.cache_clear()
+
+ return [
+ history,
+ html,
+ gr.update(visible=state['mode'] != 'instruct'),
+ gr.update(visible=state['mode'] == 'chat-instruct'),
+ gr.update(choices=histories, value=histories[0][1])
+ ]
+
+
+def handle_save_character_click(name2):
+ return [
+ name2,
+ gr.update(visible=True)
+ ]
+
+
+def handle_load_template_click(instruction_template):
+ output = load_instruction_template(instruction_template)
+ return [
+ output,
+ "Select template to load..."
+ ]
+
+
+def handle_save_template_click(instruction_template_str):
+ contents = generate_instruction_template_yaml(instruction_template_str)
+ return [
+ "My Template.yaml",
+ "instruction-templates/",
+ contents,
+ gr.update(visible=True)
+ ]
+
+
+def handle_delete_template_click(template):
+ return [
+ f"{template}.yaml",
+ "instruction-templates/",
+ gr.update(visible=True)
+ ]
+
+
+def handle_your_picture_change(picture, state):
+ upload_your_profile_picture(picture)
+ html = redraw_html(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], reset_cache=True)
+
+ return html
+
+
+def handle_send_instruction_click(state):
+ state['mode'] = 'instruct'
+ state['history'] = {'internal': [], 'visible': []}
+
+ output = generate_chat_prompt("Input", state)
+
+ return output
+
+
+def handle_send_chat_click(state):
+ output = generate_chat_prompt("", state, _continue=True)
+
+ return output
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 657133bd..d0afd6b2 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -42,13 +42,47 @@ def fix_newlines(string):
return string
+def replace_quotes(text):
+
+ # Define a list of quote pairs (opening and closing), using HTML entities
+ quote_pairs = [
+ ('"', '"'), # Double quotes
+ ('“', '”'), # Unicode left and right double quotation marks
+ ('‘', '’'), # Unicode left and right single quotation marks
+ ('«', '»'), # French quotes
+ ('„', '“'), # German quotes
+ ('‘', '’'), # Alternative single quotes
+ ('“', '”'), # Unicode quotes (numeric entities)
+ ('“', '”'), # Unicode quotes (hex entities)
+ ]
+
+ # Create a regex pattern that matches any of the quote pairs, including newlines
+ pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)
+
+ # Replace matched patterns with tags, keeping original quotes
+ replaced_text = re.sub(pattern, lambda m: f'{m.group(1)}{m.group(2)}{m.group(3)}
', text, flags=re.DOTALL)
+
+ return replaced_text
+
+
def replace_blockquote(m):
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
-@functools.lru_cache(maxsize=4096)
+@functools.lru_cache(maxsize=None)
def convert_to_markdown(string):
+ # Make \[ \] LaTeX equations inline
+ pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$'
+ replacement = r'\\[ \1 \\]'
+ string = re.sub(pattern, replacement, string, flags=re.MULTILINE)
+
+ # Escape backslashes
+ string = string.replace('\\', '\\\\')
+
+ # Quote to
+ string = replace_quotes(string)
+
# Blockquote
string = re.sub(r'(^|[\n])>', r'\1>', string)
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
@@ -69,12 +103,27 @@ def convert_to_markdown(string):
result = ''
is_code = False
+ is_latex = False
for line in string.split('\n'):
- if line.lstrip(' ').startswith('```'):
+ stripped_line = line.strip()
+
+ if stripped_line.startswith('```'):
is_code = not is_code
+ elif stripped_line.startswith('$$'):
+ is_latex = not is_latex
+ elif stripped_line.endswith('$$'):
+ is_latex = False
+ elif stripped_line.startswith('\\\\['):
+ is_latex = True
+ elif stripped_line.startswith('\\\\]'):
+ is_latex = False
+ elif stripped_line.endswith('\\\\]'):
+ is_latex = False
result += line
- if is_code or line.startswith('|'): # Don't add an extra \n for tables or code
+
+ # Don't add an extra \n for tables, code, or LaTeX
+ if is_code or is_latex or line.startswith('|'):
result += '\n'
else:
result += '\n\n'
@@ -124,6 +173,7 @@ def convert_to_markdown_wrapped(string, use_cache=True):
def generate_basic_html(string):
+ convert_to_markdown.cache_clear()
string = convert_to_markdown(string)
string = f'{string}
'
return string
diff --git a/modules/loaders.py b/modules/loaders.py
index 75ed897b..549de5fb 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -127,15 +127,6 @@ loaders_and_params = OrderedDict({
'no_use_fast',
'autogptq_info',
],
- 'AutoAWQ': [
- 'cpu_memory',
- 'gpu_memory',
- 'auto_devices',
- 'max_seq_len',
- 'no_inject_fused_attention',
- 'trust_remote_code',
- 'no_use_fast',
- ],
'HQQ': [
'hqq_backend',
'trust_remote_code',
@@ -200,7 +191,6 @@ def transformers_samplers():
loaders_samplers = {
'Transformers': transformers_samplers(),
'AutoGPTQ': transformers_samplers(),
- 'AutoAWQ': transformers_samplers(),
'HQQ': transformers_samplers(),
'ExLlamav2': {
'temperature',
diff --git a/modules/logits.py b/modules/logits.py
index 4233c8a5..73cabb41 100644
--- a/modules/logits.py
+++ b/modules/logits.py
@@ -13,8 +13,8 @@ global_scores = None
def get_next_logits(*args, **kwargs):
- if shared.args.idle_timeout > 0 and shared.model is None and shared.previous_model_name not in [None, 'None']:
- shared.model, shared.tokenizer = load_model(shared.previous_model_name)
+ if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
+ shared.model, shared.tokenizer = load_model(shared.model_name)
needs_lock = not args[2] # use_samplers
if needs_lock:
diff --git a/modules/models.py b/modules/models.py
index 07c14308..b0e2346e 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -75,7 +75,6 @@ def load_model(model_name, loader=None):
'llamacpp_HF': llamacpp_HF_loader,
'ExLlamav2': ExLlamav2_loader,
'ExLlamav2_HF': ExLlamav2_HF_loader,
- 'AutoAWQ': AutoAWQ_loader,
'HQQ': HQQ_loader,
'TensorRT-LLM': TensorRT_LLM_loader,
}
@@ -99,7 +98,7 @@ def load_model(model_name, loader=None):
if model is None:
return None, None
else:
- tokenizer = load_tokenizer(model_name, model)
+ tokenizer = load_tokenizer(model_name)
shared.settings.update({k: v for k, v in metadata.items() if k in shared.settings})
if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt'):
@@ -114,9 +113,13 @@ def load_model(model_name, loader=None):
return model, tokenizer
-def load_tokenizer(model_name, model):
+def load_tokenizer(model_name, tokenizer_dir=None):
+ if tokenizer_dir:
+ path_to_model = Path(tokenizer_dir)
+ else:
+ path_to_model = Path(f"{shared.args.model_dir}/{model_name}/")
+
tokenizer = None
- path_to_model = Path(f"{shared.args.model_dir}/{model_name}/")
if path_to_model.exists():
if shared.args.no_use_fast:
logger.info('Loading the tokenizer with use_fast=False.')
@@ -279,35 +282,24 @@ def llamacpp_loader(model_name):
def llamacpp_HF_loader(model_name):
from modules.llamacpp_hf import LlamacppHF
- path = Path(f'{shared.args.model_dir}/{model_name}')
-
- # Check if a HF tokenizer is available for the model
- if all((path / file).exists() for file in ['tokenizer_config.json']):
- logger.info(f'Using tokenizer from: \"{path}\"')
+ if shared.args.tokenizer_dir:
+ logger.info(f'Using tokenizer from: \"{shared.args.tokenizer_dir}\"')
else:
- logger.error("Could not load the model because a tokenizer in Transformers format was not found.")
- return None, None
+ path = Path(f'{shared.args.model_dir}/{model_name}')
+ # Check if a HF tokenizer is available for the model
+ if all((path / file).exists() for file in ['tokenizer_config.json']):
+ logger.info(f'Using tokenizer from: \"{path}\"')
+ else:
+ logger.error("Could not load the model because a tokenizer in Transformers format was not found.")
+ return None, None
model = LlamacppHF.from_pretrained(model_name)
- return model
-
-def AutoAWQ_loader(model_name):
- from awq import AutoAWQForCausalLM
-
- model_dir = Path(f'{shared.args.model_dir}/{model_name}')
-
- model = AutoAWQForCausalLM.from_quantized(
- quant_path=model_dir,
- max_new_tokens=shared.args.max_seq_len,
- trust_remote_code=shared.args.trust_remote_code,
- fuse_layers=not shared.args.no_inject_fused_attention,
- max_memory=get_max_memory_dict(),
- batch_size=1,
- safetensors=any(model_dir.glob('*.safetensors')),
- )
-
- return model
+ if shared.args.tokenizer_dir:
+ tokenizer = load_tokenizer(model_name, tokenizer_dir=shared.args.tokenizer_dir)
+ return model, tokenizer
+ else:
+ return model
def AutoGPTQ_loader(model_name):
@@ -387,14 +379,15 @@ def clear_torch_cache():
torch.cuda.empty_cache()
-def unload_model():
+def unload_model(keep_model_name=False):
shared.model = shared.tokenizer = None
- shared.previous_model_name = shared.model_name
- shared.model_name = 'None'
shared.lora_names = []
shared.model_dirty_from_training = False
clear_torch_cache()
+ if not keep_model_name:
+ shared.model_name = 'None'
+
def reload_model():
unload_model()
@@ -412,7 +405,7 @@ def unload_model_if_idle():
if time.time() - last_generation_time > shared.args.idle_timeout * 60:
if shared.model is not None:
logger.info("Unloading the model for inactivity.")
- unload_model()
+ unload_model(keep_model_name=True)
finally:
shared.generation_lock.release()
diff --git a/modules/models_settings.py b/modules/models_settings.py
index 7ae68125..1bb00ceb 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -180,8 +180,6 @@ def infer_loader(model_name, model_settings):
loader = None
elif (path_to_model / 'quantize_config.json').exists() or ('wbits' in model_settings and isinstance(model_settings['wbits'], int) and model_settings['wbits'] > 0):
loader = 'ExLlamav2_HF'
- elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
- loader = 'AutoAWQ'
elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists():
loader = 'llamacpp_HF'
elif len(list(path_to_model.glob('*.gguf'))) > 0:
diff --git a/modules/shared.py b/modules/shared.py
index d96e3156..43533a14 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -13,7 +13,6 @@ from modules.logging_colors import logger
model = None
tokenizer = None
model_name = 'None'
-previous_model_name = 'None'
is_seq2seq = False
model_dirty_from_training = False
lora_names = []
@@ -33,7 +32,7 @@ settings = {
'dark_theme': True,
'show_controls': True,
'start_with': '',
- 'mode': 'chat',
+ 'mode': 'chat-instruct',
'chat_style': 'cai-chat',
'prompt-default': 'QA',
'prompt-notebook': 'QA',
@@ -44,8 +43,6 @@ settings = {
'negative_prompt': '',
'seed': -1,
'truncation_length': 2048,
- 'truncation_length_min': 0,
- 'truncation_length_max': 200000,
'max_tokens_second': 0,
'max_updates_second': 0,
'prompt_lookup_num_tokens': 0,
@@ -89,7 +86,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
# Model loader
group = parser.add_argument_group('Model loader')
-group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ, AutoAWQ.')
+group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, AutoGPTQ.')
# Transformers/Accelerate
group = parser.add_argument_group('Transformers/Accelerate')
@@ -118,7 +115,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for
# llama.cpp
group = parser.add_argument_group('llama.cpp')
group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.')
-group.add_argument('--tensorcores', action='store_true', help='Use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards. NVIDIA only.')
+group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.')
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
@@ -127,7 +124,7 @@ group.add_argument('--n_batch', type=int, default=512, help='Maximum number of p
group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
group.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.')
-group.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17.')
+group.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.')
group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
group.add_argument('--logits_all', action='store_true', help='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
group.add_argument('--no_offload_kqv', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
@@ -135,6 +132,7 @@ group.add_argument('--cache-capacity', type=str, help='Maximum cache capacity (l
group.add_argument('--row_split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.')
group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
group.add_argument('--attention-sink-size', type=int, default=5, help='StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt.')
+group.add_argument('--tokenizer-dir', type=str, help='Load the tokenizer from this folder. Meant to be used with llamacpp_HF through the command-line.')
# ExLlamaV2
group = parser.add_argument_group('ExLlamaV2')
@@ -160,10 +158,6 @@ group.add_argument('--disable_exllamav2', action='store_true', help='Disable ExL
group.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
group.add_argument('--groupsize', type=int, default=-1, help='Group size.')
-# AutoAWQ
-group = parser.add_argument_group('AutoAWQ')
-group.add_argument('--no_inject_fused_attention', action='store_true', help='Disable the use of fused attention, which will use less VRAM at the cost of slower inference.')
-
# HQQ
group = parser.add_argument_group('HQQ')
group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
@@ -195,6 +189,7 @@ group.add_argument('--gradio-auth', type=str, help='Set Gradio authentication pa
group.add_argument('--gradio-auth-path', type=str, help='Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.', default=None)
group.add_argument('--ssl-keyfile', type=str, help='The path to the SSL certificate key file.', default=None)
group.add_argument('--ssl-certfile', type=str, help='The path to the SSL certificate cert file.', default=None)
+group.add_argument('--subpath', type=str, help='Customize the subpath for gradio, use with reverse proxy')
# API
group = parser.add_argument_group('API')
@@ -216,6 +211,7 @@ group.add_argument('--model_type', type=str, help='DEPRECATED')
group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED')
group.add_argument('--checkpoint', type=str, help='DEPRECATED')
group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED')
+group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED')
args = parser.parse_args()
args_defaults = parser.parse_args([])
@@ -266,8 +262,6 @@ def fix_loader_name(name):
return 'ExLlamav2'
elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
return 'ExLlamav2_HF'
- elif name in ['autoawq', 'awq', 'auto-awq']:
- return 'AutoAWQ'
elif name in ['hqq']:
return 'HQQ'
elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:
diff --git a/modules/text_generation.py b/modules/text_generation.py
index d971a30e..75e5ef36 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -32,8 +32,8 @@ from modules.models import clear_torch_cache, load_model
def generate_reply(*args, **kwargs):
- if shared.args.idle_timeout > 0 and shared.model is None and shared.previous_model_name not in [None, 'None']:
- shared.model, shared.tokenizer = load_model(shared.previous_model_name)
+ if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
+ shared.model, shared.tokenizer = load_model(shared.model_name)
shared.generation_lock.acquire()
try:
diff --git a/modules/training.py b/modules/training.py
index a810fb6e..b003fc8c 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -165,7 +165,7 @@ def create_ui():
stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
with gr.Column():
- max_length = gr.Slider(label='max_length', minimum=0, maximum=shared.settings['truncation_length_max'], value=0, step=256, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
+ max_length = gr.Number(label='max_length', precision=0, step=256, value=0, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
with gr.Row():
start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu)
diff --git a/modules/ui.py b/modules/ui.py
index d77266ce..47f92cf0 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -15,8 +15,6 @@ with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
css += f.read()
with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f:
css += f.read()
-with open(Path(__file__).resolve().parent / '../css/highlightjs/github-dark.min.css', 'r') as f:
- css += f.read()
with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f:
css += f.read()
with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
@@ -29,6 +27,8 @@ with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
show_controls_js = f.read()
with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
update_big_picture_js = f.read()
+with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r') as f:
+ dark_theme_js = f.read()
refresh_symbol = '🔄'
delete_symbol = '🗑️'
@@ -78,7 +78,6 @@ def list_model_elements():
'groupsize',
'triton',
'desc_act',
- 'no_inject_fused_attention',
'no_inject_fused_mlp',
'no_use_cuda_fp16',
'disable_exllama',
@@ -116,6 +115,7 @@ def list_model_elements():
'hqq_backend',
'cpp_runner',
]
+
if is_torch_xpu_available():
for i in range(torch.xpu.device_count()):
elements.append(f'gpu_memory_{i}')
@@ -184,6 +184,7 @@ def list_interface_input_elements():
'start_with',
'character_menu',
'history',
+ 'unique_id',
'name1',
'user_bio',
'name2',
@@ -213,9 +214,11 @@ def list_interface_input_elements():
def gather_interface_values(*args):
+ interface_elements = list_interface_input_elements()
+
output = {}
- for i, element in enumerate(list_interface_input_elements()):
- output[element] = args[i]
+ for element, value in zip(interface_elements, args):
+ output[element] = value
if not shared.args.multi_user:
shared.persistent_interface_state = output
@@ -226,8 +229,14 @@ def gather_interface_values(*args):
def apply_interface_values(state, use_persistent=False):
if use_persistent:
state = shared.persistent_interface_state
+ if 'textbox-default' in state:
+ state.pop('prompt_menu-default')
+
+ if 'textbox-notebook' in state:
+ state.pop('prompt_menu-notebook')
elements = list_interface_input_elements()
+
if len(state) == 0:
return [gr.update() for k in elements] # Dummy, do nothing
else:
@@ -236,7 +245,7 @@ def apply_interface_values(state, use_persistent=False):
def save_settings(state, preset, extensions_list, show_controls, theme_state):
output = copy.deepcopy(shared.settings)
- exclude = ['name2', 'greeting', 'context', 'turn_template', 'truncation_length']
+ exclude = ['name2', 'greeting', 'context', 'truncation_length', 'instruction_template_str']
for k in state:
if k in shared.settings and k not in exclude:
output[k] = state[k]
@@ -268,7 +277,7 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
if key in shared.default_settings and output[key] == shared.default_settings[key]:
output.pop(key)
- return yaml.dump(output, sort_keys=False, width=float("inf"))
+ return yaml.dump(output, sort_keys=False, width=float("inf"), allow_unicode=True)
def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index c6f6ddb0..57143cd8 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -85,13 +85,13 @@ def create_ui():
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
with gr.Row():
- shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
+ shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
with gr.Row():
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
with gr.Row():
- shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
+ shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
def create_chat_settings_ui():
@@ -137,7 +137,7 @@ def create_chat_settings_ui():
shared.gradio['tavern_json'] = gr.State()
with gr.Column():
shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
- shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False)
+ shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=10, label='Description', interactive=False, elem_classes=['add_scrollbar'])
shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)
@@ -181,169 +181,112 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Regenerate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Continue'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Impersonate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then(
chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Replace last reply'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.replace_last_reply, gradio('textbox', 'interface_state'), gradio('history')).then(
- lambda: '', None, gradio('textbox'), show_progress=False).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
+ chat.handle_replace_last_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
shared.gradio['Send dummy message'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.send_dummy_message, gradio('textbox', 'interface_state'), gradio('history')).then(
- lambda: '', None, gradio('textbox'), show_progress=False).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
+ chat.handle_send_dummy_message_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
shared.gradio['Send dummy reply'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.send_dummy_reply, gradio('textbox', 'interface_state'), gradio('history')).then(
- lambda: '', None, gradio('textbox'), show_progress=False).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
+ chat.handle_send_dummy_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
shared.gradio['Remove last'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.remove_last_message, gradio('history'), gradio('textbox', 'history'), show_progress=False).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None)
+ chat.handle_remove_last_click, gradio('interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
shared.gradio['Stop'].click(
stop_everything_event, None, None, queue=False).then(
- chat.redraw_html, gradio(reload_arr), gradio('display'))
+ chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
if not shared.args.multi_user:
shared.gradio['unique_id'].select(
- chat.load_history, gradio('unique_id', 'character_menu', 'mode'), gradio('history')).then(
- chat.redraw_html, gradio(reload_arr), gradio('display'))
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.handle_unique_id_select, gradio('interface_state'), gradio('history', 'display'), show_progress=False)
shared.gradio['Start new chat'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.start_new_chat, gradio('interface_state'), gradio('history')).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False)
+ chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
shared.gradio['delete_chat'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, gradio(clear_arr))
shared.gradio['delete_chat-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr))
shared.gradio['delete_chat-confirm'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- lambda x, y: str(chat.find_all_histories(x).index(y)), gradio('interface_state', 'unique_id'), gradio('temporary_text')).then(
- chat.delete_history, gradio('unique_id', 'character_menu', 'mode'), None).then(
- chat.load_history_after_deletion, gradio('interface_state', 'temporary_text'), gradio('history', 'unique_id'), show_progress=False).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr))
-
- shared.gradio['rename_chat'].click(
- lambda: "My New Chat", None, gradio('rename_to')).then(
- lambda: [gr.update(visible=True)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
-
- shared.gradio['rename_to-cancel'].click(
- lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
+ chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id') + gradio(clear_arr), show_progress=False)
+ shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
+ shared.gradio['rename_to-cancel'].click(lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
shared.gradio['rename_to-confirm'].click(
- chat.rename_history, gradio('unique_id', 'rename_to', 'character_menu', 'mode'), None).then(
- lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False).then(
- lambda x, y: gr.update(choices=chat.find_all_histories_with_first_prompts(x), value=y), gradio('interface_state', 'rename_to'), gradio('unique_id'))
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
shared.gradio['rename_to'].submit(
- chat.rename_history, gradio('unique_id', 'rename_to', 'character_menu', 'mode'), None).then(
- lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False).then(
- lambda x, y: gr.update(choices=chat.find_all_histories_with_first_prompts(x), value=y), gradio('interface_state', 'rename_to'), gradio('unique_id'))
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False)
shared.gradio['load_chat_history'].upload(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.start_new_chat, gradio('interface_state'), gradio('history')).then(
- chat.load_history_json, gradio('load_chat_history', 'history'), gradio('history')).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then(
- chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
+ chat.handle_upload_chat_history, gradio('load_chat_history', 'interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}')
shared.gradio['character_menu'].change(
- chat.load_character, gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context')).success(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False).then(
+ chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
- shared.gradio['mode'].change(None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
-
shared.gradio['mode'].change(
- lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
- chat.redraw_html, gradio(reload_arr), gradio('display')).then(
- lambda x: gr.update(choices=(histories := chat.find_all_histories_with_first_prompts(x)), value=histories[0][1]), gradio('interface_state'), gradio('unique_id'), show_progress=False)
+ chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(
+ None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
- shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'))
+ shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False)
# Save/delete a character
- shared.gradio['save_character'].click(
- lambda x: x, gradio('name2'), gradio('save_character_filename')).then(
- lambda: gr.update(visible=True), None, gradio('character_saver'))
-
- shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'))
-
- shared.gradio['load_template'].click(
- chat.load_instruction_template, gradio('instruction_template'), gradio('instruction_template_str')).then(
- lambda: "Select template to load...", None, gradio('instruction_template'))
-
+ shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False)
+ shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'), show_progress=False)
+ shared.gradio['load_template'].click(chat.handle_load_template_click, gradio('instruction_template'), gradio('instruction_template_str', 'instruction_template'), show_progress=False)
shared.gradio['save_template'].click(
- lambda: 'My Template.yaml', None, gradio('save_filename')).then(
- lambda: 'instruction-templates/', None, gradio('save_root')).then(
- chat.generate_instruction_template_yaml, gradio('instruction_template_str'), gradio('save_contents')).then(
- lambda: gr.update(visible=True), None, gradio('file_saver'))
-
- shared.gradio['delete_template'].click(
- lambda x: f'{x}.yaml', gradio('instruction_template'), gradio('delete_filename')).then(
- lambda: 'instruction-templates/', None, gradio('delete_root')).then(
- lambda: gr.update(visible=True), None, gradio('file_deleter'))
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False)
+ shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
shared.gradio['save_chat_history'].click(
lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
None, gradio('temporary_text', 'character_menu', 'mode'), None, js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}')
shared.gradio['Submit character'].click(
- chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then(
+ chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
shared.gradio['Submit tavern character'].click(
- chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then(
+ chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}')
shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character'))
@@ -351,35 +294,32 @@ def create_event_handlers():
shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False)
shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False)
shared.gradio['your_picture'].change(
- chat.upload_your_profile_picture, gradio('your_picture'), None).then(
- partial(chat.redraw_html, reset_cache=True), gradio(reload_arr), gradio('display'))
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.handle_your_picture_change, gradio('your_picture', 'interface_state'), gradio('display'), show_progress=False)
shared.gradio['send_instruction_to_default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
- partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-default')).then(
+ chat.handle_send_instruction_click, gradio('interface_state'), gradio('textbox-default'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
shared.gradio['send_instruction_to_notebook'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
- partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('textbox-notebook')).then(
+ chat.handle_send_instruction_click, gradio('interface_state'), gradio('textbox-notebook'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
shared.gradio['send_instruction_to_negative_prompt'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- lambda x: x.update({'mode': 'instruct', 'history': {'internal': [], 'visible': []}}), gradio('interface_state'), None).then(
- partial(chat.generate_chat_prompt, 'Input'), gradio('interface_state'), gradio('negative_prompt')).then(
+ chat.handle_send_instruction_click, gradio('interface_state'), gradio('negative_prompt'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}')
shared.gradio['send-chat-to-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-default')).then(
+ chat.handle_send_chat_click, gradio('interface_state'), gradio('textbox-default'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}')
shared.gradio['send-chat-to-notebook'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then(
+ chat.handle_send_chat_click, gradio('interface_state'), gradio('textbox-notebook'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
diff --git a/modules/ui_default.py b/modules/ui_default.py
index e3bfe784..112acd23 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -64,38 +64,46 @@ def create_event_handlers():
shared.gradio['Generate-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox-default'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
- shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False)
shared.gradio['Continue-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False)
+ shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False)
shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False)
- shared.gradio['save_prompt-default'].click(
- lambda x: x, gradio('textbox-default'), gradio('save_contents')).then(
- lambda: 'prompts/', None, gradio('save_root')).then(
- lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then(
- lambda: gr.update(visible=True), None, gradio('file_saver'))
-
- shared.gradio['delete_prompt-default'].click(
- lambda: 'prompts/', None, gradio('delete_root')).then(
- lambda x: x + '.txt', gradio('prompt_menu-default'), gradio('delete_filename')).then(
- lambda: gr.update(visible=True), None, gradio('file_deleter'))
-
+ shared.gradio['save_prompt-default'].click(handle_save_prompt, gradio('textbox-default'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+ shared.gradio['delete_prompt-default'].click(handle_delete_prompt, gradio('prompt_menu-default'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
shared.gradio['textbox-default'].change(lambda x: f"{count_tokens(x)}", gradio('textbox-default'), gradio('token-counter-default'), show_progress=False)
shared.gradio['get_logits-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
logits.get_next_logits, gradio('textbox-default', 'interface_state', 'use_samplers-default', 'logits-default'), gradio('logits-default', 'logits-default-previous'), show_progress=False)
shared.gradio['get_tokens-default'].click(get_token_ids, gradio('textbox-default'), gradio('tokens-default'), show_progress=False)
+
+
+def handle_save_prompt(text):
+ return [
+ text,
+ utils.current_time() + ".txt",
+ "prompts/",
+ gr.update(visible=True)
+ ]
+
+
+def handle_delete_prompt(prompt):
+ return [
+ prompt + ".txt",
+ "prompts/",
+ gr.update(visible=True)
+ ]
diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py
index 71471217..ac72c623 100644
--- a/modules/ui_file_saving.py
+++ b/modules/ui_file_saving.py
@@ -1,3 +1,5 @@
+import traceback
+
import gradio as gr
from modules import chat, presets, shared, ui, utils
@@ -47,57 +49,119 @@ def create_ui():
def create_event_handlers():
- shared.gradio['save_confirm'].click(
- lambda x, y, z: utils.save_file(x + y, z), gradio('save_root', 'save_filename', 'save_contents'), None).then(
- lambda: gr.update(visible=False), None, gradio('file_saver'))
-
- shared.gradio['delete_confirm'].click(
- lambda x, y: utils.delete_file(x + y), gradio('delete_root', 'delete_filename'), None).then(
- lambda: gr.update(visible=False), None, gradio('file_deleter'))
-
- shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter'))
- shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver'))
-
- shared.gradio['save_character_confirm'].click(
- chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then(
- lambda: gr.update(visible=False), None, gradio('character_saver')).then(
- lambda x: gr.update(choices=utils.get_available_characters(), value=x), gradio('save_character_filename'), gradio('character_menu'))
-
- shared.gradio['delete_character_confirm'].click(
- lambda x: str(utils.get_available_characters().index(x)), gradio('character_menu'), gradio('temporary_text')).then(
- chat.delete_character, gradio('character_menu'), None).then(
- chat.update_character_menu_after_deletion, gradio('temporary_text'), gradio('character_menu')).then(
- lambda: gr.update(visible=False), None, gradio('character_deleter'))
-
- shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'))
- shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'))
-
shared.gradio['save_preset'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- presets.generate_preset_yaml, gradio('interface_state'), gradio('save_preset_contents')).then(
- lambda: 'My Preset', None, gradio('save_preset_filename')).then(
- lambda: gr.update(visible=True), None, gradio('preset_saver'))
+ handle_save_preset_click, gradio('interface_state'), gradio('save_preset_contents', 'save_preset_filename', 'preset_saver'), show_progress=False)
- shared.gradio['save_preset_confirm'].click(
- lambda x, y: utils.save_file(f'presets/{x}.yaml', y), gradio('save_preset_filename', 'save_preset_contents'), None).then(
- lambda: gr.update(visible=False), None, gradio('preset_saver')).then(
- lambda x: gr.update(choices=utils.get_available_presets(), value=x), gradio('save_preset_filename'), gradio('preset_menu'))
+ shared.gradio['delete_preset'].click(handle_delete_preset_click, gradio('preset_menu'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
+ shared.gradio['save_grammar'].click(handle_save_grammar_click, gradio('grammar_string'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+ shared.gradio['delete_grammar'].click(handle_delete_grammar_click, gradio('grammar_file'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
- shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'))
+ shared.gradio['save_preset_confirm'].click(handle_save_preset_confirm_click, gradio('save_preset_filename', 'save_preset_contents'), gradio('preset_menu', 'preset_saver'), show_progress=False)
+ shared.gradio['save_confirm'].click(handle_save_confirm_click, gradio('save_root', 'save_filename', 'save_contents'), gradio('file_saver'), show_progress=False)
+ shared.gradio['delete_confirm'].click(handle_delete_confirm_click, gradio('delete_root', 'delete_filename'), gradio('file_deleter'), show_progress=False)
+ shared.gradio['save_character_confirm'].click(handle_save_character_confirm_click, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), gradio('character_menu', 'character_saver'), show_progress=False)
+ shared.gradio['delete_character_confirm'].click(handle_delete_character_confirm_click, gradio('character_menu'), gradio('character_menu', 'character_deleter'), show_progress=False)
- shared.gradio['delete_preset'].click(
- lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then(
- lambda: 'presets/', None, gradio('delete_root')).then(
- lambda: gr.update(visible=True), None, gradio('file_deleter'))
+ shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'), show_progress=False)
+ shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver'))
+ shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter'))
+ shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver'), show_progress=False)
+ shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter'), show_progress=False)
- shared.gradio['save_grammar'].click(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- lambda x: x, gradio('grammar_string'), gradio('save_contents')).then(
- lambda: 'grammars/', None, gradio('save_root')).then(
- lambda: 'My Fancy Grammar.gbnf', None, gradio('save_filename')).then(
- lambda: gr.update(visible=True), None, gradio('file_saver'))
- shared.gradio['delete_grammar'].click(
- lambda x: x, gradio('grammar_file'), gradio('delete_filename')).then(
- lambda: 'grammars/', None, gradio('delete_root')).then(
- lambda: gr.update(visible=True), None, gradio('file_deleter'))
+def handle_save_preset_confirm_click(filename, contents):
+ try:
+ utils.save_file(f"presets/{filename}.yaml", contents)
+ available_presets = utils.get_available_presets()
+ output = gr.update(choices=available_presets, value=filename),
+ except Exception:
+ output = gr.update()
+ traceback.print_exc()
+
+ return [
+ output,
+ gr.update(visible=False)
+ ]
+
+
+def handle_save_confirm_click(root, filename, contents):
+ try:
+ utils.save_file(root + filename, contents)
+ except Exception:
+ traceback.print_exc()
+
+ return gr.update(visible=False)
+
+
+def handle_delete_confirm_click(root, filename):
+ try:
+ utils.delete_file(root + filename)
+ except Exception:
+ traceback.print_exc()
+
+ return gr.update(visible=False)
+
+
+def handle_save_character_confirm_click(name2, greeting, context, character_picture, filename):
+ try:
+ chat.save_character(name2, greeting, context, character_picture, filename)
+ available_characters = utils.get_available_characters()
+ output = gr.update(choices=available_characters, value=filename)
+ except Exception:
+ output = gr.update()
+ traceback.print_exc()
+
+ return [
+ output,
+ gr.update(visible=False)
+ ]
+
+
+def handle_delete_character_confirm_click(character):
+ try:
+ index = str(utils.get_available_characters().index(character))
+ chat.delete_character(character)
+ output = chat.update_character_menu_after_deletion(index)
+ except Exception:
+ output = gr.update()
+ traceback.print_exc()
+
+ return [
+ output,
+ gr.update(visible=False)
+ ]
+
+
+def handle_save_preset_click(state):
+ contents = presets.generate_preset_yaml(state)
+ return [
+ contents,
+ "My Preset",
+ gr.update(visible=True)
+ ]
+
+
+def handle_delete_preset_click(preset):
+ return [
+ f"{preset}.yaml",
+ "presets/",
+ gr.update(visible=True)
+ ]
+
+
+def handle_save_grammar_click(grammar_string):
+ return [
+ grammar_string,
+ "My Fancy Grammar.gbnf",
+ "grammars/",
+ gr.update(visible=True)
+ ]
+
+
+def handle_delete_grammar_click(grammar_file):
+ return [
+ grammar_file,
+ "grammars/",
+ gr.update(visible=True)
+ ]
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 7a85020f..1883fdca 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -66,7 +66,6 @@ def create_ui():
ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
shared.gradio['load_model'] = gr.Button("Load", visible=not shared.settings['autoload_model'], elem_classes='refresh-button', interactive=not mu)
shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
- shared.gradio['reload_model'] = gr.Button("Reload", elem_classes='refresh-button', interactive=not mu)
shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
with gr.Column():
@@ -94,19 +93,19 @@ def create_ui():
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers, info='Must be set to more than 0 for your GPU to be used.')
- shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
- shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 18,17')
+ shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
+ shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=shared.args.wbits if shared.args.wbits > 0 else "None")
shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None")
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
- shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len)
+ shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. Try lowering this if you run out of memory while loading the model.')
with gr.Blocks():
- shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.05, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value)
- shared.gradio['rope_freq_base'] = gr.Slider(label='rope_freq_base', minimum=0, maximum=20000000, step=1000, info='If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63)', value=shared.args.rope_freq_base)
- shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=0.1, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.', value=shared.args.compress_pos_emb)
+ shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
+ shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
+ shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
shared.gradio['autogptq_info'] = gr.Markdown('ExLlamav2_HF is recommended over AutoGPTQ for models derived from Llama.')
@@ -118,7 +117,7 @@ def create_ui():
shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.')
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
- shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')
+ shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.')
shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
@@ -128,7 +127,6 @@ def create_ui():
shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
shared.gradio['no_mul_mat_q'] = gr.Checkbox(label="no_mul_mat_q", value=shared.args.no_mul_mat_q, info='Disable the mulmat kernels.')
shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton)
- shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Fuses layers for AutoAWQ. Disable if running low on VRAM.')
shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.')
shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.')
shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.')
@@ -188,39 +186,24 @@ def create_ui():
def create_event_handlers():
- shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params()))
+ shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params()), show_progress=False)
# In this event handler, the interface state is read and updated
# with the model defaults (if any), and then the model is loaded
# unless "autoload_model" is unchecked
shared.gradio['model_menu'].change(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- apply_model_settings_to_state, gradio('model_menu', 'interface_state'), gradio('interface_state')).then(
- ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then(
- update_model_parameters, gradio('interface_state'), None).then(
+ handle_load_model_event_initial, gradio('model_menu', 'interface_state'), gradio(ui.list_interface_input_elements()) + gradio('interface_state'), show_progress=False).then(
load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False).success(
- update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then(
- lambda x: x, gradio('loader'), gradio('filter_by_loader'))
+ handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False)
shared.gradio['load_model'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
update_model_parameters, gradio('interface_state'), None).then(
partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success(
- update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then(
- lambda x: x, gradio('loader'), gradio('filter_by_loader'))
-
- shared.gradio['reload_model'].click(
- unload_model, None, None).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- update_model_parameters, gradio('interface_state'), None).then(
- partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success(
- update_truncation_length, gradio('truncation_length', 'interface_state'), gradio('truncation_length')).then(
- lambda x: x, gradio('loader'), gradio('filter_by_loader'))
-
- shared.gradio['unload_model'].click(
- unload_model, None, None).then(
- lambda: "Model unloaded", None, gradio('model_status'))
+ handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False)
+ shared.gradio['unload_model'].click(handle_unload_model_click, None, gradio('model_status'), show_progress=False)
shared.gradio['save_model_settings'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
save_model_settings, gradio('model_menu', 'interface_state'), gradio('model_status'), show_progress=False)
@@ -353,3 +336,20 @@ def update_truncation_length(current_length, state):
return state['n_ctx']
return current_length
+
+
+def handle_load_model_event_initial(model, state):
+ state = apply_model_settings_to_state(model, state)
+ output = ui.apply_interface_values(state)
+ update_model_parameters(state)
+ return output + [state]
+
+
+def handle_load_model_event_final(truncation_length, loader, state):
+ truncation_length = update_truncation_length(truncation_length, state)
+ return [truncation_length, loader]
+
+
+def handle_unload_model_click():
+ unload_model()
+ return "Model unloaded"
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index 307bc0f3..79932844 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -7,6 +7,7 @@ from modules.text_generation import (
get_token_ids,
stop_everything_event
)
+from modules.ui_default import handle_delete_prompt, handle_save_prompt
from modules.utils import gradio
inputs = ('textbox-notebook', 'interface_state')
@@ -66,38 +67,32 @@ def create_event_handlers():
lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox-notebook'].submit(
lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
- shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False)
- shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False)
shared.gradio['Regenerate-notebook'].click(
lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
+ shared.gradio['Undo'].click(
+ lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then(
+ lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None)
+
+ shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False)
shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False)
shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False)
- shared.gradio['save_prompt-notebook'].click(
- lambda x: x, gradio('textbox-notebook'), gradio('save_contents')).then(
- lambda: 'prompts/', None, gradio('save_root')).then(
- lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then(
- lambda: gr.update(visible=True), None, gradio('file_saver'))
-
- shared.gradio['delete_prompt-notebook'].click(
- lambda: 'prompts/', None, gradio('delete_root')).then(
- lambda x: x + '.txt', gradio('prompt_menu-notebook'), gradio('delete_filename')).then(
- lambda: gr.update(visible=True), None, gradio('file_deleter'))
-
+ shared.gradio['save_prompt-notebook'].click(handle_save_prompt, gradio('textbox-notebook'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+ shared.gradio['delete_prompt-notebook'].click(handle_delete_prompt, gradio('prompt_menu-notebook'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
shared.gradio['textbox-notebook'].input(lambda x: f"{count_tokens(x)}", gradio('textbox-notebook'), gradio('token-counter-notebook'), show_progress=False)
shared.gradio['get_logits-notebook'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 68512c7e..eff62c20 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -40,9 +40,9 @@ def create_ui(default_preset):
shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
with gr.Blocks():
- shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to value > 0 to enable DRY. Controls the magnitude of the penalty for the shortest penalized sequences.')
- shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
+ shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
+ shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
gr.Markdown("[Learn more](https://github.com/oobabooga/text-generation-webui/wiki/03-%E2%80%90-Parameters-Tab)")
@@ -89,7 +89,7 @@ def create_ui(default_preset):
shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.')
with gr.Column():
- shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
+ shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
@@ -102,10 +102,16 @@ def create_ui(default_preset):
def create_event_handlers():
shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader', 'dynamic_temperature'), gradio(loaders.list_all_samplers()), show_progress=False)
- shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
- shared.gradio['random_preset'].click(presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
- shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'))
- shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'))
+ shared.gradio['preset_menu'].change(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+
+ shared.gradio['random_preset'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+
+ shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False)
+ shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False)
def get_truncation_length():
diff --git a/modules/ui_session.py b/modules/ui_session.py
index 087091ce..dfb95b83 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -35,15 +35,22 @@ def create_ui():
None, None, None, js='() => {document.body.innerHTML=\'Reloading...
\'; setTimeout(function(){location.reload()},2500); return []}')
shared.gradio['toggle_dark_mode'].click(
- None, None, None, js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}').then(
- lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state'))
+ lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
+ None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
shared.gradio['save_settings'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- ui.save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents')).then(
- lambda: './', None, gradio('save_root')).then(
- lambda: 'settings.yaml', None, gradio('save_filename')).then(
- lambda: gr.update(visible=True), None, gradio('file_saver'))
+ handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+
+
+def handle_save_settings(state, preset, extensions, show_controls, theme):
+ contents = ui.save_settings(state, preset, extensions, show_controls, theme)
+ return [
+ contents,
+ "settings.yaml",
+ "./",
+ gr.update(visible=True)
+ ]
def set_interface_arguments(extensions, bool_active):
diff --git a/modules/utils.py b/modules/utils.py
index 4b65736b..f4333031 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -95,11 +95,10 @@ def get_available_presets():
def get_available_prompts():
- prompts = []
- files = set((k.stem for k in Path('prompts').glob('*.txt')))
- prompts += sorted([k for k in files if re.match('^[0-9]', k)], key=natural_keys, reverse=True)
- prompts += sorted([k for k in files if re.match('^[^0-9]', k)], key=natural_keys)
- prompts += ['None']
+ prompt_files = list(Path('prompts').glob('*.txt'))
+ sorted_files = sorted(prompt_files, key=lambda x: x.stat().st_mtime, reverse=True)
+ prompts = [file.stem for file in sorted_files]
+ prompts.append('None')
return prompts
diff --git a/one_click.py b/one_click.py
index e94b6d44..0a0412ba 100644
--- a/one_click.py
+++ b/one_click.py
@@ -388,7 +388,12 @@ def update_requirements(initial_installation=False, pull=True):
# Prepare the requirements file
textgen_requirements = open(requirements_file).read().splitlines()
if is_cuda118:
- textgen_requirements = [req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "auto-gptq" not in req]
+ textgen_requirements = [
+ req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
+ for req in textgen_requirements
+ if "auto-gptq" not in req.lower() and "autoawq" not in req.lower()
+ ]
+
if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
diff --git a/requirements.txt b/requirements.txt
index 14a8b8df..9c8ffd94 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
auto-gptq==0.7.1
bitsandbytes==0.43.*
@@ -14,7 +14,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -24,7 +24,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
@@ -37,31 +37,38 @@ soundfile
openai-whisper
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
\ No newline at end of file
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 43412538..5ff86863 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
colorama
datasets
einops
@@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
@@ -32,16 +32,18 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.82+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
-https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.89+rocm5.6.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.89+rocm5.6.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
\ No newline at end of file
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 34cb4599..c0d094e8 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
colorama
datasets
einops
@@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
@@ -32,14 +32,16 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
-https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.5/autoawq-0.2.5+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+rocm5.6.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
\ No newline at end of file
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 1b170c27..cbcf1175 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
colorama
datasets
einops
@@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
@@ -32,8 +32,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index d557eae1..d5cb3675 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
colorama
datasets
einops
@@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
@@ -32,10 +32,10 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.82-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.89-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 5aed6167..807c182a 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
colorama
datasets
einops
@@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
@@ -32,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 3937b002..e2a89936 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
colorama
datasets
einops
@@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
@@ -32,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 15b45a05..bfe57329 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
aqlm[gpu,cpu]==1.1.6; platform_system == "Linux"
auto-gptq==0.7.1
bitsandbytes==0.43.*
@@ -14,7 +14,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -24,7 +24,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
@@ -35,31 +35,38 @@ sse-starlette==1.6.5
tiktoken
# llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.82+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.89+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.89+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.82+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.89+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/exllamav2/releases/download/v0.1.9/exllamav2-0.1.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ/releases/download/0.2.6/autoawq-0.2.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/AutoAWQ_kernels/releases/download/0.0.7/autoawq_kernels-0.0.7-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
\ No newline at end of file
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 14e3aa88..ffb45fe3 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -1,4 +1,4 @@
-accelerate==0.32.*
+accelerate==0.33.*
colorama
datasets
einops
@@ -11,7 +11,7 @@ numba==0.59.*
numpy==1.26.*
optimum==1.17.*
pandas
-peft==0.8.*
+peft==0.12.*
Pillow>=9.5.0
psutil
pyyaml
@@ -21,7 +21,7 @@ safetensors==0.4.*
scipy
sentencepiece
tensorboard
-transformers==4.42.*
+transformers==4.44.*
tqdm
wandb
diff --git a/server.py b/server.py
index 7afa954e..d6069d5e 100644
--- a/server.py
+++ b/server.py
@@ -90,7 +90,7 @@ def create_interface():
# Force some events to be triggered on page load
shared.persistent_interface_state.update({
'loader': shared.args.loader or 'Transformers',
- 'mode': shared.settings['mode'],
+ 'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
'character_menu': shared.args.character or shared.settings['character'],
'instruction_template_str': shared.settings['instruction_template_str'],
'prompt_menu-default': shared.settings['prompt-default'],
@@ -146,11 +146,21 @@ def create_interface():
ui_model_menu.create_event_handlers()
# Interface launch events
- shared.gradio['interface'].load(None, None, None, js=f"() => {{if ({str(shared.settings['dark_theme']).lower()}) {{ document.getElementsByTagName('body')[0].classList.add('dark'); }} }}")
- shared.gradio['interface'].load(None, None, None, js=f"() => {{{js}}}")
- shared.gradio['interface'].load(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
+ shared.gradio['interface'].load(
+ None,
+ gradio('show_controls'),
+ None,
+ js=f"""(x) => {{
+ if ({str(shared.settings['dark_theme']).lower()}) {{
+ document.getElementsByTagName('body')[0].classList.add('dark');
+ }}
+ {js}
+ {ui.show_controls_js}
+ toggle_controls(x);
+ }}"""
+ )
+
shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False)
- shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
extensions_module.create_extensions_tabs() # Extensions tabs
extensions_module.create_extensions_block() # Extensions block
@@ -169,6 +179,7 @@ def create_interface():
ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True,
ssl_keyfile=shared.args.ssl_keyfile,
ssl_certfile=shared.args.ssl_certfile,
+ root_path=shared.args.subpath,
allowed_paths=["cache", "css", "extensions", "js"]
)
diff --git a/settings-template.yaml b/settings-template.yaml
index f09c845e..59c76c35 100644
--- a/settings-template.yaml
+++ b/settings-template.yaml
@@ -1,7 +1,7 @@
dark_theme: true
show_controls: true
start_with: ''
-mode: chat
+mode: chat-instruct
chat_style: cai-chat
prompt-default: QA
prompt-notebook: QA
@@ -12,8 +12,6 @@ max_new_tokens_max: 4096
negative_prompt: ''
seed: -1
truncation_length: 2048
-truncation_length_min: 0
-truncation_length_max: 200000
max_tokens_second: 0
max_updates_second: 0
prompt_lookup_num_tokens: 0
diff --git a/start_linux.sh b/start_linux.sh
index 5620c831..792daca8 100755
--- a/start_linux.sh
+++ b/start_linux.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
cd "$(dirname "${BASH_SOURCE[0]}")"
diff --git a/training/formats/ChatML-format.json b/training/formats/ChatML-format.json
new file mode 100644
index 00000000..a9f8a09a
--- /dev/null
+++ b/training/formats/ChatML-format.json
@@ -0,0 +1,4 @@
+{
+ "instruction,output": "<|im_start|>system\n<|im_end|>\n<|im_start|>user\n%instruction%<|im_end|>\n<|im_start|>assistant\n%output%<|im_end|>",
+ "instruction,input,output": "<|im_start|>system\n<|im_end|>\n<|im_start|>user\n%instruction%: %input%<|im_end|>\n<|im_start|>assistant\n%output%<|im_end|>"
+}
diff --git a/update_wizard_linux.sh b/update_wizard_linux.sh
index c5add61e..3ada9a1e 100755
--- a/update_wizard_linux.sh
+++ b/update_wizard_linux.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
cd "$(dirname "${BASH_SOURCE[0]}")"