Merge pull request #6585 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2024-12-18 23:24:55 -03:00 committed by GitHub
commit 4d466d5c80
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 1207 additions and 439 deletions

View File

@ -4,9 +4,9 @@ A Gradio web UI for Large Language Models.
Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation. Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation.
|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) | |![Image1](https://github.com/oobabooga/screenshots/raw/main/AFTER-INSTRUCT.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/AFTER-CHAT.png) |
|:---:|:---:| |:---:|:---:|
|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) | |![Image1](https://github.com/oobabooga/screenshots/raw/main/AFTER-DEFAULT.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/AFTER-PARAMETERS.png) |
## Features ## Features
@ -202,18 +202,19 @@ List of command-line flags
```txt ```txt
usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS] usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--chat-buttons] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]]
[--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast]
[--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--tensorcores]
[--flash-attn] [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS]
[--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE]
[--attention-sink-size ATTENTION_SINK_SIZE] [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa]
[--no_xformers] [--no_sdpa] [--cache_8bit] [--cache_4bit] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2]
[--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
[--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT]
[--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
[--subpath SUBPATH] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] [--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui]
[--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention]
[--cache_4bit] [--cache_8bit] [--chat-buttons]
Text generation web UI Text generation web UI
@ -232,7 +233,6 @@ Basic settings:
file will be loaded by default without the need to use the --settings flag. file will be loaded by default without the need to use the --settings flag.
--extensions EXTENSIONS [EXTENSIONS ...] The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. --extensions EXTENSIONS [EXTENSIONS ...] The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.
--verbose Print the prompts to the terminal. --verbose Print the prompts to the terminal.
--chat-buttons Show buttons on the chat tab instead of a hover menu.
--idle-timeout IDLE_TIMEOUT Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again. --idle-timeout IDLE_TIMEOUT Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.
Model loader: Model loader:
@ -291,9 +291,8 @@ ExLlamaV2:
--no_flash_attn Force flash-attention to not be used. --no_flash_attn Force flash-attention to not be used.
--no_xformers Force xformers to not be used. --no_xformers Force xformers to not be used.
--no_sdpa Force Torch SDPA to not be used. --no_sdpa Force Torch SDPA to not be used.
--cache_8bit Use 8-bit cache to save VRAM.
--cache_4bit Use Q4 cache to save VRAM.
--num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral. --num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral.
--enable_tp Enable Tensor Parallelism (TP) in ExLlamaV2.
AutoGPTQ: AutoGPTQ:
--triton Use triton. --triton Use triton.
@ -311,6 +310,9 @@ HQQ:
TensorRT-LLM: TensorRT-LLM:
--cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet. --cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet.
Cache:
--cache_type CACHE_TYPE KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.
DeepSpeed: DeepSpeed:
--deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. --deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.
--nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading. --nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading.
@ -332,6 +334,7 @@ Gradio:
--ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file. --ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file.
--ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file. --ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file.
--subpath SUBPATH Customize the subpath for gradio, use with reverse proxy --subpath SUBPATH Customize the subpath for gradio, use with reverse proxy
--old-colors Use the legacy Gradio colors, before the December/2024 update.
API: API:
--api Enable the API extension. --api Enable the API extension.

Binary file not shown.

Binary file not shown.

View File

@ -16,6 +16,6 @@
} }
.message { .message {
padding-bottom: 30px; padding-bottom: 2em;
grid-template-columns: 70px minmax(0, 1fr); grid-template-columns: 70px minmax(0, 1fr);
} }

View File

@ -1,7 +1,7 @@
.message { .message {
display: grid; display: grid;
grid-template-columns: 60px minmax(0, 1fr); grid-template-columns: 60px minmax(0, 1fr);
padding-bottom: 15px; padding-bottom: 2em;
font-size: 15px; font-size: 15px;
font-family: 'Noto Sans', Helvetica, Arial, sans-serif; font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
line-height: 22.5px !important; line-height: 22.5px !important;

View File

@ -1,21 +1,54 @@
.chat { .chat {
background: transparent; background: transparent;
padding: 24px 19px; padding: 0;
padding-right: 19px !important;
padding-top: 0; padding-top: 0;
} }
.chat > .messages { .chat > .messages:first-child {
padding-top: 18px !important; padding-top: 0 !important;
} }
.message { .chat > .messages > :last-child {
display: grid; margin-bottom: 1.7rem !important;
grid-template-columns: 60px 1fr; }
padding-bottom: 25px;
font-size: 15px; .chat .message-body p, .chat .message-body li {
font-family: 'Noto Sans', Helvetica, Arial, sans-serif; font-size: 1rem !important;
line-height: 24px; line-height: 28px !important;
}
.dark .chat .message-body p,
.dark .chat .message-body li,
.dark .chat .message-body q {
color: #d1d5db !important;
}
.chat .message-body p,
.chat .message-body ul,
.chat .message-body ol {
margin-top: 1.25em !important;
margin-bottom: 1.25em !important;
}
.chat .message-body p:first-child,
.chat .message-body ul:first-child,
.chat .message-body ol:first-child {
margin-top: 0 !important;
}
.chat .message-body p:last-child,
.chat .message-body ul:last-child,
.chat .message-body ol:last-child {
margin-bottom: 0 !important;
}
.chat .message-body li {
margin-top: 1.25em !important;
margin-bottom: 1.25em !important;
}
.user-message, .assistant-message {
font-family: Inter, Helvetica, Arial, sans-serif;
} }
.message:first-child { .message:first-child {
@ -26,49 +59,43 @@
display: none; display: none;
} }
.message-body p, .message-body li { .chat .user-message {
font-size: 15px !important; padding: 1.5rem 1rem;
line-height: 24px !important; border-radius: 0;
}
.message-body p, .chat .message-body ul, .chat .message-body ol {
margin-bottom: 16px !important;
}
.message-body p:last-child, .chat .message-body ul:last-child, .chat .message-body ol:last-child {
margin-bottom: 0 !important;
}
.gradio-container .chat .assistant-message {
padding: 20px;
background: #f4f4f4;
margin-top: 9px !important;
margin-bottom: 12px !important;
border-radius: 7px;
border: 1px solid var(--border-color-primary);
}
.dark .chat .assistant-message {
background: var(--color-grey-800);
}
.gradio-container .chat .user-message {
padding: 20px;
padding-left: 0;
padding-right: 0;
background-color: transparent;
border-radius: 8px;
border-bottom-right-radius: 0; border-bottom-right-radius: 0;
} }
.gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child { .chat .assistant-message {
margin-bottom: 0 !important; background: #f4f4f4;
padding: 1.5rem 1rem;
border-radius: 0;
border: 0;
} }
code { .dark .chat .user-message {
background: transparent;
}
.dark .chat .assistant-message {
background: var(--light-gray);
}
.chat .user-message .text,
.chat .assistant-message .text {
max-width: 40.25rem;
margin-left: auto;
margin-right: auto;
}
/* Create space between two assistant messages in a row */
.assistant-message + .assistant-message {
margin-top: 1.5rem;
}
pre > code {
background-color: #f3f4f6 !important; background-color: #f3f4f6 !important;
} }
.dark code { .dark pre > code {
background-color: #1f2937 !important; background-color: #1f2937 !important;
} }

View File

@ -1,7 +1,46 @@
:root {
--darker-gray: #202123;
--dark-gray: #343541;
--light-gray: #444654;
--light-theme-gray: #f4f4f4;
--border-color-dark: #525252;
--header-width: 112px;
--selected-item-color-dark: #32333e;
}
@font-face {
font-family: Inter;
src: url('file/css/Inter/Inter-VariableFont_opsz,wght.ttf') format('truetype');
font-weight: 100 900;
font-style: normal;
}
@font-face {
font-family: Inter;
src: url('file/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf') format('truetype');
font-weight: 100 900;
font-style: italic;
}
.tabs.svelte-710i53 { .tabs.svelte-710i53 {
margin-top: 0 margin-top: 0
} }
.padded.svelte-12cmxck {
padding: 3px 0;
}
div.svelte-sfqy0y,
div.svelte-iyf88w {
background: transparent;
border: 0;
}
/* "info" messages without a title above */
.block > .svelte-e8n7p6:not(:only-of-type, #chat-mode *) {
margin-bottom: 2px;
}
.py-6 { .py-6 {
padding-top: 2.5rem padding-top: 2.5rem
} }
@ -19,7 +58,7 @@
height: 39.594px; height: 39.594px;
align-self: end; align-self: end;
line-height: 1em; line-height: 1em;
border-radius: 0.5em; border-radius: 0.375rem;
flex: none; flex: none;
} }
@ -46,10 +85,6 @@
min-height: 0 min-height: 0
} }
.dark svg {
fill: white;
}
.dark a { .dark a {
color: white !important; color: white !important;
} }
@ -62,14 +97,20 @@ ol li p, ul li p {
border: 0; border: 0;
} }
#default-tab, #notebook-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab {
padding: 1rem;
}
.gradio-container { .gradio-container {
max-width: 100% !important; max-width: 100% !important;
padding-top: 0 !important; padding-top: 0 !important;
} }
#extensions { #extensions {
margin-top: 5px; margin: 5px auto 35px;
margin-bottom: 35px; max-width: 880px;
padding: 1em;
padding-left: calc(var(--header-width) + 1em);
} }
.extension-tab { .extension-tab {
@ -86,20 +127,29 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
} }
gradio-app > :first-child { gradio-app > :first-child {
padding-left: var(--size-4) !important; padding: 0 !important;
padding-right: var(--size-4) !important;
} }
.header_bar { .header_bar {
background-color: #f4f4f4;
box-shadow: 0 0 3px rgba(22 22 22 / 35%); box-shadow: 0 0 3px rgba(22 22 22 / 35%);
margin-bottom: 0; margin-bottom: 0;
overflow-x: scroll; overflow-x: scroll;
margin-left: calc(-1 * var(--size-4));
margin-right: calc(-1 * var(--size-4));
display: block !important;
text-wrap: nowrap; text-wrap: nowrap;
z-index: 90; z-index: 90;
position: fixed;
display: flex !important;
flex-direction: column;
height: 100dvh;
width: var(--header-width);
}
.header_bar button {
margin: 0;
padding: 0.75rem;
}
.header_bar button.selected {
border: 0;
} }
.dark .header_bar { .dark .header_bar {
@ -113,23 +163,23 @@ gradio-app > :first-child {
} }
.textbox_default textarea { .textbox_default textarea {
height: calc(100dvh - 271px); height: calc(100dvh - 201px);
} }
.textbox_default_output textarea { .textbox_default_output textarea {
height: calc(100dvh - 185px); height: calc(100dvh - 117px);
} }
.textbox textarea { .textbox textarea {
height: calc(100dvh - 241px); height: calc(100dvh - 172px);
} }
.textbox_logits textarea { .textbox_logits textarea {
height: calc(100dvh - 236px); height: calc(100dvh - 205px);
} }
.textbox_logits_notebook textarea { .textbox_logits_notebook textarea {
height: calc(100dvh - 292px); height: calc(100dvh - 221px);
} }
.monospace textarea { .monospace textarea {
@ -149,24 +199,6 @@ gradio-app > :first-child {
color: #efefef !important; color: #efefef !important;
} }
@media screen and (width <= 711px) {
.textbox_default textarea {
height: calc(100dvh - 259px);
}
div .default-token-counter {
top: calc( 0.5 * (100dvh - 236px) ) !important;
}
.transparent-substring {
display: none;
}
.hover-menu {
min-width: 250px !important;
}
}
/* Hide the gradio footer */ /* Hide the gradio footer */
footer { footer {
display: none !important; display: none !important;
@ -227,11 +259,13 @@ button {
.pretty_scrollbar::-webkit-scrollbar-thumb, .pretty_scrollbar::-webkit-scrollbar-thumb,
.pretty_scrollbar::-webkit-scrollbar-thumb:hover { .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
background: var(--neutral-300); background: var(--neutral-300);
border-radius: 30px;
} }
.dark .pretty_scrollbar::-webkit-scrollbar-thumb, .dark .pretty_scrollbar::-webkit-scrollbar-thumb,
.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover { .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
background: var(--neutral-700); background: #ccc;
border-radius: 10px;
} }
.pretty_scrollbar::-webkit-resizer { .pretty_scrollbar::-webkit-resizer {
@ -239,7 +273,8 @@ button {
} }
.dark .pretty_scrollbar::-webkit-resizer { .dark .pretty_scrollbar::-webkit-resizer {
background: #374151; background: #ccc;
border-radius: 10px;
} }
.pretty_scrollbar::-webkit-scrollbar-corner { .pretty_scrollbar::-webkit-scrollbar-corner {
@ -251,20 +286,26 @@ audio {
} }
/* Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui */ /* Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui */
.token-counter { #default-token-counter, #notebook-token-counter {
position: absolute !important; position: absolute !important;
top: calc( 0.5 * (100dvh - 218px) ) !important;
right: 2px;
z-index: 100; z-index: 100;
background: var(--input-background-fill) !important; background: var(--input-background-fill) !important;
min-height: 0 !important; min-height: 0 !important;
width: 0;
text-align: left;
direction: rtl;
right: 5px;
} }
.default-token-counter { #default-token-counter {
top: calc( 0.5 * (100dvh - 248px) ) !important; top: calc(100dvh - 200px) !important;
} }
.token-counter span { #notebook-token-counter {
top: calc(100dvh - 171px) !important;
}
#default-token-counter span, #notebook-token-counter span {
padding: 1px; padding: 1px;
box-shadow: 0 0 0 0.3em rgb(192 192 192 / 15%), inset 0 0 0.6em rgb(192 192 192 / 7.5%); box-shadow: 0 0 0 0.3em rgb(192 192 192 / 15%), inset 0 0 0.6em rgb(192 192 192 / 7.5%);
border: 2px solid rgb(192 192 192 / 40%) !important; border: 2px solid rgb(192 192 192 / 40%) !important;
@ -272,15 +313,15 @@ audio {
} }
.no-background { .no-background {
background: var(--background-fill-primary) !important; background: transparent;
padding: 0 !important; padding: 0 !important;
} }
/* ---------------------------------------------- /* ----------------------------------------------
Chat tab Chat tab
---------------------------------------------- */ ---------------------------------------------- */
.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx { .h-\[40dvh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
height: 66.67vh height: 66.67dvh
} }
.gradio-container { .gradio-container {
@ -310,7 +351,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
} }
#chat-tab { #chat-tab {
padding-top: 0; padding: 0;
}
#chat-tab > :nth-child(1) {
display: flex;
flex-direction: row;
gap: 0;
} }
#chat-tab button#Generate, #chat-tab button#stop { #chat-tab button#Generate, #chat-tab button#stop {
@ -322,7 +369,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
} }
#chat-tab > :first-child, #extensions { #chat-tab > :first-child, #extensions {
max-width: 880px;
margin-left: auto; margin-left: auto;
margin-right: auto; margin-right: auto;
} }
@ -342,61 +388,49 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
.chat { .chat {
margin-left: auto; margin-left: auto;
margin-right: auto; margin-right: auto;
max-width: 880px;
min-height: var(--chat-height); min-height: var(--chat-height);
overflow-y: auto; overflow-y: auto;
padding-right: 15px;
display: flex; display: flex;
flex-direction: column; flex-direction: column;
word-break: break-word; word-break: break-word;
overflow-wrap: anywhere; overflow-wrap: anywhere;
border-top: none; border-top: none;
border-radius: 0 0 0 8px; border-radius: 0;
visibility: visible; visibility: visible;
} }
.chat-parent { .chat-parent {
height: calc(100dvh - 98px - var(--header-height) - var(--input-delta)); height: calc(100dvh - 98px - var(--input-delta));
overflow: auto !important; overflow: auto !important;
border-radius: 0 !important; border-radius: 0 !important;
margin-bottom: var(--input-delta) !important; margin-bottom: var(--input-delta) !important;
} }
/* On desktop, automatically hide the chat scroll bar
* when not hovered. */
@media (hover: hover) and (pointer: fine) {
.chat-parent {
visibility: hidden;
}
.chat-parent:focus, .chat-parent:hover {
visibility: visible;
}
}
.chat-parent .prose { .chat-parent .prose {
visibility: visible; visibility: visible;
} }
.old-ui .chat-parent { .chat .message {
height: calc(100dvh - 192px - var(--header-height) - var(--input-delta)); width: min(100%, 48rem);
margin-bottom: var(--input-delta) !important; margin-left: auto;
margin-right: auto;
text-align: start;
padding-left: 1rem;
padding-right: 1rem;
} }
.chat-parent.bigchat { .chat-parent.bigchat {
height: calc(100dvh - 98px - var(--header-height) - var(--input-delta)) !important; height: calc(100dvh - 98px - var(--input-delta)) !important;
margin-bottom: var(--input-delta) !important; margin-bottom: var(--input-delta) !important;
} }
.chat > .messages { .chat > .messages {
display: flex; display: flex;
flex-direction: column; flex-direction: column;
padding-top: 25px;
} }
.chat .message:last-child { .chat > .messages > :first-child {
margin-bottom: 0 !important; padding-top: 20px;
padding-bottom: 15px !important;
} }
.message-body h1, .message-body h1,
@ -404,7 +438,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
.message-body h3, .message-body h3,
.message-body h4 { .message-body h4 {
color: var(--body-text-color); color: var(--body-text-color);
margin: 20px 0 10px 0; margin: 20px 0 10px;
} }
.dark .message q { .dark .message q {
@ -423,12 +457,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
padding-inline-start: 2em; padding-inline-start: 2em;
} }
.message-body li:not(:last-child) { .chat .message-body li:not(:last-child) {
margin-top: 0 !important; margin-top: 0;
margin-bottom: 2px !important; margin-bottom: 2px;
} }
.message-body li:last-child { .chat .message-body li:last-child {
margin-bottom: 0 !important; margin-bottom: 0 !important;
} }
@ -456,7 +490,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
overflow: scroll; overflow: scroll;
} }
.message-body code { .prose ul ul {
margin: 0;
}
.message-body pre > code {
white-space: pre-wrap !important; white-space: pre-wrap !important;
word-wrap: break-word !important; word-wrap: break-word !important;
border: 1px solid #666; border: 1px solid #666;
@ -467,7 +505,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
color: #1f2328; color: #1f2328;
} }
.dark .message-body code { .dark .message-body pre > code {
background: #0d1117 !important; background: #0d1117 !important;
color: rgb(201 209 217); color: rgb(201 209 217);
} }
@ -477,8 +515,18 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
padding: 15px; padding: 15px;
} }
.message-body :not(pre) > code::before {
content: "`";
}
.message-body :not(pre) > code::after {
content: "`";
}
.message-body :not(pre) > code { .message-body :not(pre) > code {
white-space: normal !important; white-space: normal !important;
font-weight: bold;
font-family: unset;
} }
#chat-input { #chat-input {
@ -488,6 +536,15 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
border: none; border: none;
} }
#chat-input textarea {
padding: 0.65rem 2.5rem;
}
#chat-input textarea::placeholder {
white-space: nowrap;
overflow: hidden;
}
#chat-input textarea:focus { #chat-input textarea:focus {
box-shadow: none !important; box-shadow: none !important;
} }
@ -500,6 +557,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
display: none; display: none;
} }
.chat-input-positioned {
position: absolute;
bottom: 0;
max-width: 54rem;
left: 50%;
transform: translateX(-50%);
}
@media print { @media print {
body { body {
visibility: hidden; visibility: hidden;
@ -535,7 +600,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
#show-controls { #show-controls {
position: absolute; position: absolute;
height: 100%;
background-color: transparent; background-color: transparent;
border: 0 !important; border: 0 !important;
border-radius: 0; border-radius: 0;
@ -544,7 +608,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
#show-controls label { #show-controls label {
z-index: 1000; z-index: 1000;
position: absolute; position: absolute;
right: 0; right: 30px;
top: 10px;
white-space: nowrap; white-space: nowrap;
overflow: hidden; overflow: hidden;
text-overflow: ellipsis; text-overflow: ellipsis;
@ -626,7 +691,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
position: absolute; position: absolute;
bottom: 80%; bottom: 80%;
left: 0; left: 0;
background-color: var(--background-fill-primary);
box-shadow: 0 0 5px rgb(0 0 0 / 25%); box-shadow: 0 0 5px rgb(0 0 0 / 25%);
z-index: 10000; z-index: 10000;
min-width: 330px; min-width: 330px;
@ -637,7 +701,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
width: 100%; width: 100%;
background: transparent !important; background: transparent !important;
border-radius: 0 !important; border-radius: 0 !important;
border-color: var(--border-color-primary);
justify-content: space-between; justify-content: space-between;
margin: 0 !important; margin: 0 !important;
height: 36px; height: 36px;
@ -659,7 +722,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
opacity: 0.333; opacity: 0.333;
} }
#chat-tab:not(.old-ui) #chat-buttons { #chat-tab #chat-buttons {
display: none !important; display: none !important;
} }
@ -690,23 +753,37 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
} }
#chat-input-row { #chat-input-row {
padding-bottom: 20px; padding-bottom: 1.5em;
padding-left: 1rem;
padding-right: 1rem;
} }
.old-ui #chat-input-row, #chat-input-row.bigchat { #chat-input-row.bigchat {
padding-bottom: 0 !important; padding-bottom: 1px !important;
} }
#chat-col { #chat-col {
padding-bottom: 100px; padding-bottom: 100px;
} }
.old-ui #chat-col, #chat-col.bigchat { @media screen and (width <= 924px) {
padding-bottom: 80px !important; #chat-col {
padding-bottom: 100px;
margin-top: 32px;
position: relative; /* Ensure positioning for the pseudo-element */
}
.chat-parent {
height: calc(100dvh - 98px - var(--input-delta) - 32px);
}
.chat-parent.bigchat {
height: calc(100dvh - 98px - var(--input-delta) - 32px) !important;
}
} }
.old-ui #chat-buttons #clear-history-confirm { #chat-col.bigchat {
order: -1; padding-bottom: 80px !important;
} }
.chat ol, .chat ul { .chat ol, .chat ul {
@ -721,26 +798,37 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
} }
/* ---------------------------------------------- /* ----------------------------------------------
Past chat histories in a side bar on desktop Create the sidebars
---------------------------------------------- */ ---------------------------------------------- */
@media screen and (width >= 1327px) { #chat-controls,
#past-chats-row { #past-chats-row {
position: absolute; width: 260px;
top: 36px; max-width: 80vw;
left: 0; padding: 0.5rem;
width: calc(0.5*(var(--document-width) - 880px - 120px - 16px*2)); height: 100dvh;
max-width: 300px; flex-shrink: 0;
margin-left: calc(-0.5*(var(--document-width) - 880px - 14px - 16px * 2)); box-sizing: content-box;
} z-index: 10;
}
#chat-controls { #past-chats-row:not(.negative-header) {
position: absolute; max-width: calc(85vw - var(--header-width));
top: 16px; }
right: 0;
width: calc(0.5*(var(--document-width) - 880px - 120px - 16px*2)); #chat-controls {
max-width: 400px; padding: 1rem;
margin-right: calc(-0.5*(var(--document-width) - 880px - 14px - 16px * 2)); padding-bottom: 0;
} overflow-y: scroll;
}
#chat-controls > :nth-child(1) {
padding: 0.5rem;
}
#past-chats-row + * {
width: unset;
flex-grow: 1;
flex-shrink: 1;
} }
/* ---------------------------------------------- /* ----------------------------------------------
@ -748,6 +836,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
---------------------------------------------- */ ---------------------------------------------- */
.options { .options {
z-index: 100 !important; z-index: 100 !important;
border: 1px solid var(--input-border-color);
border-radius: 0;
} }
/* ---------------------------------------------- /* ----------------------------------------------
@ -757,12 +847,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
position: fixed; position: fixed;
bottom: 0; bottom: 0;
left: 0; left: 0;
width: calc((100vw - 880px - 120px) /2); width: calc(100vw / 2 - 600px);
z-index: 10000;
} }
.pfp_character { .pfp_character {
position: relative; position: relative;
z-index: 100;
} }
.pfp_character:hover { .pfp_character:hover {
@ -776,10 +866,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
} }
#past-chats { #past-chats {
max-height: calc(100vh - 195px); max-height: calc(100dvh - 90px);
overflow-y: scroll !important; overflow-y: scroll !important;
border-radius: 0; border-radius: 0;
scrollbar-width: none; /* Hide scrollbar in Firefox by default */ scrollbar-width: auto;
}
#past-chats::-webkit-scrollbar {
display: block;
} }
#past-chats label { #past-chats label {
@ -790,6 +884,24 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
border-radius: 0; border-radius: 0;
padding-top: 8px; padding-top: 8px;
padding-bottom: 8px; padding-bottom: 8px;
position: relative;
min-height: 42px !important;
}
#past-chats label::before {
content: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M8 9h8"></path><path d="M8 13h6"></path><path d="M18 4a3 3 0 0 1 3 3v8a3 3 0 0 1 -3 3h-5l-5 3v-3h-2a3 3 0 0 1 -3 -3v-8a3 3 0 0 1 3 -3h12z"></path></svg>');
position: absolute;
top: 12px;
left: 12px;
margin-right: 8px;
}
.dark #past-chats label::before {
content: url('data:image/svg+xml;utf8,<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M8 9h8"></path><path d="M8 13h6"></path><path d="M18 4a3 3 0 0 1 3 3v8a3 3 0 0 1 -3 3h-5l-5 3v-3h-2a3 3 0 0 1 -3 -3v-8a3 3 0 0 1 3 -3h12z"></path></svg>');
}
#past-chats label span {
margin-left: 29px;
} }
#past-chats > :nth-child(2) { #past-chats > :nth-child(2) {
@ -797,23 +909,260 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
} }
#past-chats > :nth-child(3) { #past-chats > :nth-child(3) {
gap: 0; gap: 0.25rem;
} }
#past-chats::-webkit-scrollbar { #past-chats input {
display: none; display: none;
} }
#past-chats:hover { #past-chats label {
scrollbar-width: auto; padding: 0.75rem;
font-size: 12.5px;
font-weight: 400;
} }
#past-chats:hover::-webkit-scrollbar { #past-chats .selected,
display: block; #past-chats label:hover {
border-radius: 0.5rem;
} }
@media screen and (width < 1327px) { #past-chats label:hover {
#past-chats { cursor: pointer;
max-height: 300px; }
#past-chats-buttons,
#delete-chat-row,
#rename-row {
width: 100%;
justify-content: center;
}
#past-chats-row,
#chat-controls {
width: 260px;
padding: 0.5rem;
height: calc(100dvh - 16px);
flex-shrink: 0;
box-sizing: content-box;
}
.sidebar-hidden {
width: 0 !important;
padding: 0 !important;
overflow: hidden;
}
#past-chats-toggle,
#chat-controls-toggle,
#navigation-toggle {
display: flex;
align-items: center;
justify-content: center;
cursor: pointer;
user-select: none;
border-radius: 3px;
z-index: 1000;
position: fixed;
width: 2rem;
height: 2rem;
top: 0;
}
#past-chats-toggle svg,
#chat-controls-toggle svg,
#navigation-toggle svg {
pointer-events: none;
}
@media screen and (width <= 408px) {
#past-chats-toggle.past-chats-open {
top: 28px;
}
#chat-controls-toggle.chat-controls-open {
top: 28px;
right: calc(16px + min(260px, 80vw)) !important;
}
}
#past-chats-toggle.past-chats-open.negative-header {
left: calc(min(260px, 85vw) + 16px);
}
#past-chats-toggle.past-chats-open:not(.negative-header) {
left: calc(112px + min(260px, calc(85vw - var(--header-width))) + 16px);
}
#past-chats-toggle.past-chats-closed:not(.negative-header) {
left: 112px;
}
#past-chats-toggle.past-chats-closed.negative-header {
left: 0;
top: 28px;
}
@media screen and (width <= 924px) {
#past-chats-toggle.past-chats-closed.negative-header {
left: 28px;
top: 0;
}
}
.header_bar ~ * {
margin-left: var(--header-width);
}
/* Positions for chat-controls-toggle */
#chat-controls-toggle.chat-controls-open {
right: calc(min(260px, 80vw) + 23px);
}
#chat-controls-toggle.chat-controls-closed {
right: 7px;
}
@media screen and (width <= 924px) {
#chat-controls.sidebar-shown {
position: fixed;
right: 0;
}
#past-chats-row.sidebar-shown {
position: fixed;
}
}
/* ----------------------------------------------
Dark theme
---------------------------------------------- */
.dark .header_bar {
background-color: var(--darker-gray) !important;
}
.dark .header_bar button.selected {
background: var(--selected-item-color-dark);
}
.dark #chat-input textarea {
background: var(--light-gray);
color: white !important;
border-color: #292c3b;
}
.dark #chat-input textarea::placeholder {
color: #9ca3af;
}
.dark .hover-menu {
background-color: var(--darker-gray);
}
.dark .hover-menu button {
border-color: var(--border-color-primary);
}
.dark #chat-controls,
.dark #past-chats-row {
background-color: var(--darker-gray);
border: 0 !important;
}
.dark #past-chats .selected,
.dark #past-chats label:hover {
background-color: var(--selected-item-color-dark) !important;
}
.dark #past-chats-row,
.dark #chat-controls {
background-color: var(--darker-gray);
}
.dark #past-chats-toggle,
.dark #chat-controls-toggle,
.dark #navigation-toggle {
color: white;
}
.dark svg {
fill: white;
color: white;
}
@media screen and (width <= 408px) {
.dark #past-chats-toggle.past-chats-open {
background: var(--darker-gray);
}
.dark #chat-controls-toggle.chat-controls-open {
background: var(--darker-gray);
}
}
/* ----------------------------------------------
Light theme
---------------------------------------------- */
.header_bar {
background-color: var(--light-theme-gray) !important;
}
.header_bar button.selected {
background: white;
}
#chat-controls,
#past-chats-row {
background-color: var(--light-theme-gray);
}
#chat-controls {
border-left: 1px solid #d9d9d0;
}
#past-chats-row {
border-right: 1px solid #d9d9d0;
}
#past-chats-toggle,
#chat-controls-toggle,
#navigation-toggle {
color: gray !important;
}
.mobile-top-bar {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 32px;
z-index: 2;
opacity: 0;
pointer-events: none;
}
@media screen and (width <= 924px) {
.mobile-top-bar {
opacity: 1;
pointer-events: auto;
}
.dark .mobile-top-bar {
background-color: var(--darker-gray);
}
.mobile-top-bar {
background-color: var(--light-theme-gray);
}
}
@media screen and (width <= 408px) {
#past-chats-toggle.past-chats-open {
background: var(--light-theme-gray);
}
#chat-controls-toggle.chat-controls-open {
background: var(--light-theme-gray);
} }
} }

View File

@ -1 +1 @@
TTS==0.21.* coqui-tts==0.25.1

View File

@ -143,17 +143,16 @@ def convert_history(history):
new_history = [] new_history = []
for entry in history: for entry in history:
if isinstance(entry['content'], list): if isinstance(entry['content'], list):
image_url = None
content = None
for item in entry['content']: for item in entry['content']:
if not isinstance(item, dict): if not isinstance(item, dict):
continue continue
image_url = None
content = None
if item['type'] == 'image_url' and isinstance(item['image_url'], dict): if item['type'] == 'image_url' and isinstance(item['image_url'], dict):
image_url = item['image_url']['url'] image_url = item['image_url']['url']
elif item['type'] == 'text' and isinstance(item['text'], str): elif item['type'] == 'text' and isinstance(item['text'], str):
content = item['text'] content = item['text']
if image_url: if image_url:
new_history.append({"image_url": image_url, "role": "user"}) new_history.append({"image_url": image_url, "role": "user"})
if content: if content:

View File

@ -18,16 +18,18 @@ document.querySelector(".header_bar").addEventListener("click", function(event)
if (extensionsVisible) { if (extensionsVisible) {
if (extensions) { if (extensions) {
extensions.style.display = "flex"; extensions.style.display = "flex";
extensions.style.maxWidth = chatVisible ? "880px" : "none";
extensions.style.padding = chatVisible ? "0px" : "15px";
} }
this.style.marginBottom = chatVisible ? "0px" : "19px"; this.style.marginBottom = chatVisible ? "0px" : "19px";
if (chatVisible && !showControlsChecked) { if (chatVisible && !showControlsChecked) {
document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions").forEach(element => { document.querySelectorAll(
"#chat-tab > div > :nth-child(1), #chat-tab > div > :nth-child(3), #chat-tab > div > :nth-child(4), #extensions"
).forEach(element => {
element.style.display = "none"; element.style.display = "none";
}); });
} }
} else { } else {
this.style.marginBottom = "19px"; this.style.marginBottom = "19px";
if (extensions) extensions.style.display = "none"; if (extensions) extensions.style.display = "none";
@ -132,8 +134,7 @@ targetElement.addEventListener("scroll", function() {
const observer = new MutationObserver(function(mutations) { const observer = new MutationObserver(function(mutations) {
updateCssProperties(); updateCssProperties();
const firstChild = targetElement.children[0]; if (targetElement.classList.contains("_generating")) {
if (firstChild.classList.contains("generating")) {
typing.parentNode.classList.add("visible-dots"); typing.parentNode.classList.add("visible-dots");
document.getElementById("stop").style.display = "flex"; document.getElementById("stop").style.display = "flex";
document.getElementById("Generate").style.display = "none"; document.getElementById("Generate").style.display = "none";
@ -255,7 +256,7 @@ for (i = 0; i < slimDropdownElements.length; i++) {
// The show/hide events were adapted from: // The show/hide events were adapted from:
// https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js // https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
//------------------------------------------------ //------------------------------------------------
var buttonsInChat = document.querySelectorAll("#chat-tab:not(.old-ui) #chat-buttons button"); var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button");
var button = document.getElementById("hover-element-button"); var button = document.getElementById("hover-element-button");
var menu = document.getElementById("hover-menu"); var menu = document.getElementById("hover-menu");
var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement; var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
@ -290,12 +291,6 @@ if (buttonsInChat.length > 0) {
thisButton.innerHTML = newText; thisButton.innerHTML = newText;
} }
} }
} else {
buttonsInChat = document.querySelectorAll("#chat-tab.old-ui #chat-buttons button");
for (let i = 0; i < buttonsInChat.length; i++) {
buttonsInChat[i].textContent = buttonsInChat[i].textContent.replace(/ \(.*?\)/, "");
}
document.getElementById("gr-hover-container").style.display = "none";
} }
function isMouseOverButtonOrMenu() { function isMouseOverButtonOrMenu() {
@ -339,6 +334,8 @@ menu.addEventListener("mouseleave", function () {
// Add event listener for click anywhere in the document // Add event listener for click anywhere in the document
document.addEventListener("click", function (event) { document.addEventListener("click", function (event) {
const target = event.target;
// Check if the click is outside the button/menu and the menu is visible // Check if the click is outside the button/menu and the menu is visible
if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") { if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
hideMenu(); hideMenu();
@ -347,6 +344,21 @@ document.addEventListener("click", function (event) {
if (event.target.classList.contains("pfp_character")) { if (event.target.classList.contains("pfp_character")) {
toggleBigPicture(); toggleBigPicture();
} }
// Handle sidebar clicks on mobile
if (isMobile()) {
// Check if the click did NOT originate from any of the specified toggle buttons or elements
if (
target.closest("#navigation-toggle") !== navigationToggle &&
target.closest("#past-chats-toggle") !== pastChatsToggle &&
target.closest("#chat-controls-toggle") !== chatControlsToggle &&
target.closest(".header_bar") !== headerBar &&
target.closest("#past-chats-row") !== pastChatsRow &&
target.closest("#chat-controls") !== chatControlsRow
) {
handleIndividualSidebarClose(event);
}
}
}); });
//------------------------------------------------ //------------------------------------------------
@ -361,10 +373,9 @@ for (var i = 0; i < 2; i++) {
parent.insertBefore(elementToMove, parent.firstChild); parent.insertBefore(elementToMove, parent.firstChild);
//------------------------------------------------ //------------------------------------------------
// Make the chat input grow upwards instead of downwards // Position the chat input
//------------------------------------------------ //------------------------------------------------
document.getElementById("show-controls").parentNode.style.position = "absolute"; document.getElementById("show-controls").parentNode.classList.add("chat-input-positioned");
document.getElementById("show-controls").parentNode.style.bottom = "0px";
//------------------------------------------------ //------------------------------------------------
// Focus on the chat input // Focus on the chat input
@ -444,20 +455,10 @@ function updateCssProperties() {
// Check if the chat container is visible // Check if the chat container is visible
if (chatContainer.clientHeight > 0) { if (chatContainer.clientHeight > 0) {
var numericHeight = chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100; const newChatHeight = `${chatContainer.parentNode.clientHeight - chatInputHeight + 40 - 100 - 20}px`;
if (document.getElementById("chat-tab").style.paddingBottom != "") {
numericHeight += 20;
}
const newChatHeight = `${numericHeight}px`;
document.documentElement.style.setProperty("--chat-height", newChatHeight); document.documentElement.style.setProperty("--chat-height", newChatHeight);
document.documentElement.style.setProperty("--input-delta", `${chatInputHeight - 40}px`); document.documentElement.style.setProperty("--input-delta", `${chatInputHeight - 40}px`);
// Get and set header height
const header = document.querySelector(".header_bar");
const headerHeight = `${header.clientHeight}px`;
document.documentElement.style.setProperty("--header-height", headerHeight);
// Adjust scrollTop based on input height change // Adjust scrollTop based on input height change
if (chatInputHeight !== currentChatInputHeight) { if (chatInputHeight !== currentChatInputHeight) {
if (!isScrolled && chatInputHeight < currentChatInputHeight) { if (!isScrolled && chatInputHeight < currentChatInputHeight) {
@ -477,18 +478,6 @@ new ResizeObserver(updateCssProperties).observe(document.querySelector("#chat-in
// Handle changes in window size // Handle changes in window size
window.addEventListener("resize", updateCssProperties); window.addEventListener("resize", updateCssProperties);
//------------------------------------------------
// Keep track of the display width to position the past
// chats dropdown on desktop
//------------------------------------------------
function updateDocumentWidth() {
var updatedWidth = window.innerWidth || document.documentElement.clientWidth || document.body.clientWidth;
document.documentElement.style.setProperty("--document-width", updatedWidth + "px");
}
updateDocumentWidth();
window.addEventListener("resize", updateDocumentWidth);
//------------------------------------------------ //------------------------------------------------
// Focus on the rename text area when it becomes visible // Focus on the rename text area when it becomes visible
//------------------------------------------------ //------------------------------------------------
@ -568,6 +557,8 @@ function moveToChatTab() {
grandParent.style.display = "none"; grandParent.style.display = "none";
} }
grandParent.children[0].style.minWidth = "100%";
const chatControlsFirstChild = document.querySelector("#chat-controls").firstElementChild; const chatControlsFirstChild = document.querySelector("#chat-controls").firstElementChild;
const newParent = chatControlsFirstChild; const newParent = chatControlsFirstChild;
let newPosition = newParent.children.length - 2; let newPosition = newParent.children.length - 2;
@ -586,6 +577,7 @@ function restoreOriginalPosition() {
document.getElementById("save-character").style.display = ""; document.getElementById("save-character").style.display = "";
movedElement.style.display = ""; movedElement.style.display = "";
movedElement.children[0].style.minWidth = "";
} }
} }
@ -612,3 +604,222 @@ window.addEventListener("beforeunload", function (event) {
}); });
moveToChatTab(); moveToChatTab();
//------------------------------------------------
// Buttons to toggle the sidebars
//------------------------------------------------
const leftArrowSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-arrow-bar-left">
<path d="M4 12l10 0"></path>
<path d="M4 12l4 4"></path>
<path d="M4 12l4 -4"></path>
<path d="M20 4l0 16"></path>
</svg>`;
const rightArrowSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-arrow-bar-right">
<path d="M20 12l-10 0"></path>
<path d="M20 12l-4 4"></path>
<path d="M20 12l-4 -4"></path>
<path d="M4 4l0 16"></path>
</svg>`;
const hamburgerMenuSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-hamburger-menu">
<line x1="3" y1="12" x2="21" y2="12"></line>
<line x1="3" y1="6" x2="21" y2="6"></line>
<line x1="3" y1="18" x2="21" y2="18"></line>
</svg>`;
const closeMenuSVG = `
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-close-menu">
<line x1="18" y1="6" x2="6" y2="18"></line>
<line x1="6" y1="6" x2="18" y2="18"></line>
</svg>`;
const chatTab = document.getElementById("chat-tab");
const pastChatsRow = document.getElementById("past-chats-row");
const chatControlsRow = document.getElementById("chat-controls");
if (chatTab) {
// Create past-chats-toggle div
const pastChatsToggle = document.createElement("div");
pastChatsToggle.id = "past-chats-toggle";
pastChatsToggle.innerHTML = leftArrowSVG; // Set initial icon to left arrow
pastChatsToggle.classList.add("past-chats-open"); // Set initial position
// Create chat-controls-toggle div
const chatControlsToggle = document.createElement("div");
chatControlsToggle.id = "chat-controls-toggle";
chatControlsToggle.innerHTML = rightArrowSVG; // Set initial icon to right arrow
chatControlsToggle.classList.add("chat-controls-open"); // Set initial position
// Append both elements to the chat-tab
chatTab.appendChild(pastChatsToggle);
chatTab.appendChild(chatControlsToggle);
}
// Create navigation toggle div
const navigationToggle = document.createElement("div");
navigationToggle.id = "navigation-toggle";
navigationToggle.innerHTML = leftArrowSVG; // Set initial icon to right arrow
navigationToggle.classList.add("navigation-left"); // Set initial position
headerBar.appendChild(navigationToggle);
// Retrieve the dynamically created toggle buttons
const pastChatsToggle = document.getElementById("past-chats-toggle");
const chatControlsToggle = document.getElementById("chat-controls-toggle");
function handleIndividualSidebarClose(event) {
const target = event.target;
// Close navigation bar if click is outside and it is open
if (!headerBar.contains(target) && !headerBar.classList.contains("sidebar-hidden")) {
toggleSidebar(headerBar, navigationToggle, true);
}
// Close past chats row if click is outside and it is open
if (!pastChatsRow.contains(target) && !pastChatsRow.classList.contains("sidebar-hidden")) {
toggleSidebar(pastChatsRow, pastChatsToggle, true);
}
// Close chat controls row if click is outside and it is open
if (!chatControlsRow.contains(target) && !chatControlsRow.classList.contains("sidebar-hidden")) {
toggleSidebar(chatControlsRow, chatControlsToggle, true);
}
}
function toggleSidebar(sidebar, toggle, forceClose = false) {
const isCurrentlyHidden = sidebar.classList.contains("sidebar-hidden");
const shouldClose = !isCurrentlyHidden;
// Apply visibility classes
sidebar.classList.toggle("sidebar-hidden", shouldClose);
sidebar.classList.toggle("sidebar-shown", !shouldClose);
if (sidebar === headerBar) {
// Special handling for header bar
document.documentElement.style.setProperty("--header-width", shouldClose ? "0px" : "112px");
pastChatsRow.classList.toggle("negative-header", shouldClose);
pastChatsToggle.classList.toggle("negative-header", shouldClose);
toggle.innerHTML = shouldClose ? hamburgerMenuSVG : closeMenuSVG;
} else if (sidebar === pastChatsRow) {
// Past chats sidebar
toggle.classList.toggle("past-chats-closed", shouldClose);
toggle.classList.toggle("past-chats-open", !shouldClose);
toggle.innerHTML = shouldClose ? rightArrowSVG : leftArrowSVG;
} else if (sidebar === chatControlsRow) {
// Chat controls sidebar
toggle.classList.toggle("chat-controls-closed", shouldClose);
toggle.classList.toggle("chat-controls-open", !shouldClose);
toggle.innerHTML = shouldClose ? leftArrowSVG : rightArrowSVG;
}
// Mobile handling
if (isMobile()) {
sidebar.classList.toggle("sidebar-shown", !shouldClose);
}
}
// Function to check if the device is mobile
function isMobile() {
return window.innerWidth <= 924;
}
// Function to initialize sidebars
function initializeSidebars() {
const isOnMobile = isMobile();
if (isOnMobile) {
// Mobile state: Hide sidebars and set closed states
[pastChatsRow, chatControlsRow, headerBar].forEach(el => {
el.classList.add("sidebar-hidden");
el.classList.remove("sidebar-shown");
});
document.documentElement.style.setProperty("--header-width", "0px");
pastChatsRow.classList.add("negative-header");
pastChatsToggle.classList.add("negative-header", "past-chats-closed");
pastChatsToggle.classList.remove("past-chats-open");
[chatControlsToggle, navigationToggle].forEach(el => {
el.classList.add("chat-controls-closed");
el.classList.remove("chat-controls-open");
});
pastChatsToggle.innerHTML = rightArrowSVG;
chatControlsToggle.innerHTML = leftArrowSVG;
navigationToggle.innerHTML = hamburgerMenuSVG;
} else {
// Desktop state: Show sidebars and set open states
[pastChatsRow, chatControlsRow].forEach(el => {
el.classList.remove("sidebar-hidden", "sidebar-shown");
});
pastChatsToggle.classList.add("past-chats-open");
pastChatsToggle.classList.remove("past-chats-closed");
[chatControlsToggle, navigationToggle].forEach(el => {
el.classList.add("chat-controls-open");
el.classList.remove("chat-controls-closed");
});
pastChatsToggle.innerHTML = leftArrowSVG;
chatControlsToggle.innerHTML = rightArrowSVG;
navigationToggle.innerHTML = closeMenuSVG;
}
}
// Run the initializer when the page loads
initializeSidebars();
// Add click event listeners to toggle buttons
pastChatsToggle.addEventListener("click", () => {
toggleSidebar(pastChatsRow, pastChatsToggle);
});
chatControlsToggle.addEventListener("click", () => {
toggleSidebar(chatControlsRow, chatControlsToggle);
});
navigationToggle.addEventListener("click", () => {
toggleSidebar(headerBar, navigationToggle);
});
//------------------------------------------------
// Fixes #chat-input textarea height issue
// for devices with width <= 924px
//------------------------------------------------
if (isMobile()) {
// Target the textarea
const textarea = document.querySelector("#chat-input textarea");
if (textarea) {
// Simulate adding and removing a newline
textarea.value += "\n";
textarea.dispatchEvent(new Event("input", { bubbles: true }));
textarea.value = textarea.value.slice(0, -1);
textarea.dispatchEvent(new Event("input", { bubbles: true }));
}
}
//------------------------------------------------
// Create a top navigation bar on mobile
//------------------------------------------------
function createMobileTopBar() {
const chatTab = document.getElementById("chat-tab");
// Only create the top bar if it doesn't already exist
if (chatTab && !chatTab.querySelector(".mobile-top-bar")) {
const topBar = document.createElement("div");
topBar.classList.add("mobile-top-bar");
// Insert the top bar as the first child of chat-tab
chatTab.appendChild(topBar);
}
}
createMobileTopBar();

View File

@ -1,4 +1,6 @@
const belowChatInput = document.querySelectorAll("#chat-tab > div > :nth-child(n+2), #extensions"); const belowChatInput = document.querySelectorAll(
"#chat-tab > div > :nth-child(1), #chat-tab > div > :nth-child(3), #chat-tab > div > :nth-child(4), #extensions"
);
const chatParent = document.querySelector(".chat-parent"); const chatParent = document.querySelector(".chat-parent");
function toggle_controls(value) { function toggle_controls(value) {

View File

@ -47,7 +47,7 @@ def my_open(*args, **kwargs):
if len(args) > 1 and args[1] == 'rb': if len(args) > 1 and args[1] == 'rb':
file_contents = file_contents.decode('utf-8') file_contents = file_contents.decode('utf-8')
file_contents = file_contents.replace('\t\t<script\n\t\t\tsrc="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.9/iframeResizer.contentWindow.min.js"\n\t\t\tasync\n\t\t></script>', '') file_contents = file_contents.replace('\t\t<script\n\t\t\tsrc="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"\n\t\t\tasync\n\t\t></script>', '')
file_contents = file_contents.replace('cdnjs.cloudflare.com', '127.0.0.1') file_contents = file_contents.replace('cdnjs.cloudflare.com', '127.0.0.1')
file_contents = file_contents.replace( file_contents = file_contents.replace(
'</head>', '</head>',

View File

@ -352,13 +352,17 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)): for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)):
# Extract the reply # Extract the reply
visible_reply = reply
if state['mode'] in ['chat', 'chat-instruct']: if state['mode'] in ['chat', 'chat-instruct']:
visible_reply = re.sub("(<USER>|<user>|{{user}})", state['name1'], reply) visible_reply = re.sub("(<USER>|<user>|{{user}})", state['name1'], reply + '')
else:
visible_reply = reply + ''
visible_reply = html.escape(visible_reply) visible_reply = html.escape(visible_reply)
if shared.stop_everything: if shared.stop_everything:
if output['visible'][-1][1].endswith(''):
output['visible'][-1][1] = output['visible'][-1][1][:-1]
output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
yield output yield output
return return
@ -374,6 +378,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
if is_stream: if is_stream:
yield output yield output
if output['visible'][-1][1].endswith(''):
output['visible'][-1][1] = output['visible'][-1][1][:-1]
output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
yield output yield output
@ -606,9 +613,9 @@ def find_all_histories_with_first_prompts(state):
first_prompt = first_prompt.strip() first_prompt = first_prompt.strip()
# Truncate the first prompt if it's longer than 32 characters # Truncate the first prompt if it's longer than 30 characters
if len(first_prompt) > 32: if len(first_prompt) > 30:
first_prompt = first_prompt[:29] + '...' first_prompt = first_prompt[:30-3] + '...'
result.append((first_prompt, filename)) result.append((first_prompt, filename))
@ -1087,9 +1094,8 @@ def handle_delete_chat_confirm_click(state):
def handle_rename_chat_click(): def handle_rename_chat_click():
return [ return [
gr.update(visible=True, value="My New Chat"), gr.update(value="My New Chat"),
gr.update(visible=True), gr.update(visible=True),
gr.update(visible=True)
] ]
@ -1100,16 +1106,14 @@ def handle_rename_chat_confirm(rename_to, state):
return [ return [
gr.update(choices=histories, value=rename_to), gr.update(choices=histories, value=rename_to),
gr.update(visible=False), gr.update(visible=False),
gr.update(visible=False),
gr.update(visible=False)
] ]
def handle_upload_chat_history(load_chat_history, state): def handle_upload_chat_history(load_chat_history, state):
history = start_new_chat(state) history = start_new_chat(state)
history = load_history_json(load_chat_history, history) history = load_history_json(load_chat_history, history)
histories = find_all_histories_with_first_prompts(state)
save_history(history, state['unique_id'], state['character_menu'], state['mode']) save_history(history, state['unique_id'], state['character_menu'], state['mode'])
histories = find_all_histories_with_first_prompts(state)
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
@ -1209,7 +1213,7 @@ def handle_delete_template_click(template):
return [ return [
f"{template}.yaml", f"{template}.yaml",
"instruction-templates/", "instruction-templates/",
gr.update(visible=True) gr.update(visible=False)
] ]

View File

@ -2,17 +2,19 @@ import traceback
from pathlib import Path from pathlib import Path
import torch import torch
from exllamav2 import ( from exllamav2 import (
ExLlamaV2, ExLlamaV2,
ExLlamaV2Cache, ExLlamaV2Cache,
ExLlamaV2Cache_8bit, ExLlamaV2Cache_8bit,
ExLlamaV2Cache_Q4, ExLlamaV2Cache_Q4,
ExLlamaV2Cache_Q6,
ExLlamaV2Cache_Q8,
ExLlamaV2Cache_TP, ExLlamaV2Cache_TP,
ExLlamaV2Config, ExLlamaV2Config,
ExLlamaV2Tokenizer ExLlamaV2Tokenizer
) )
from exllamav2.generator import ExLlamaV2Sampler, ExLlamaV2StreamingGenerator from exllamav2.generator import ExLlamaV2Sampler, ExLlamaV2StreamingGenerator
from modules import shared from modules import shared
from modules.logging_colors import logger from modules.logging_colors import logger
from modules.text_generation import get_max_prompt_length from modules.text_generation import get_max_prompt_length
@ -57,12 +59,20 @@ class Exllamav2Model:
model.load(split) model.load(split)
# Determine the correct cache type # Determine the correct cache type
if shared.args.cache_8bit: kv_cache_type = shared.args.cache_type.lower()
if kv_cache_type == 'fp16':
cache_type = ExLlamaV2Cache
elif kv_cache_type == 'fp8':
cache_type = ExLlamaV2Cache_8bit cache_type = ExLlamaV2Cache_8bit
elif shared.args.cache_4bit: elif kv_cache_type == 'q8':
cache_type = ExLlamaV2Cache_Q8
elif kv_cache_type == 'q6':
cache_type = ExLlamaV2Cache_Q6
elif kv_cache_type == 'q4':
cache_type = ExLlamaV2Cache_Q4 cache_type = ExLlamaV2Cache_Q4
else: else:
cache_type = ExLlamaV2Cache raise ValueError(f"Invalid cache type for ExLlamaV2: {cache_type}. Valid options are: fp16, fp8, q8, q6, q4.")
# Use TP if specified # Use TP if specified
if shared.args.enable_tp: if shared.args.enable_tp:

View File

@ -4,18 +4,20 @@ from pathlib import Path
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union
import torch import torch
from torch.nn import CrossEntropyLoss
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
from transformers.modeling_outputs import CausalLMOutputWithPast
from exllamav2 import ( from exllamav2 import (
ExLlamaV2, ExLlamaV2,
ExLlamaV2Cache, ExLlamaV2Cache,
ExLlamaV2Cache_8bit, ExLlamaV2Cache_8bit,
ExLlamaV2Cache_Q4, ExLlamaV2Cache_Q4,
ExLlamaV2Cache_Q6,
ExLlamaV2Cache_Q8,
ExLlamaV2Cache_TP, ExLlamaV2Cache_TP,
ExLlamaV2Config ExLlamaV2Config
) )
from torch.nn import CrossEntropyLoss
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
from transformers.modeling_outputs import CausalLMOutputWithPast
from modules import shared from modules import shared
from modules.logging_colors import logger from modules.logging_colors import logger
@ -45,12 +47,20 @@ class Exllamav2HF(PreTrainedModel):
self.ex_model.load(split) self.ex_model.load(split)
# Determine the correct cache type # Determine the correct cache type
if shared.args.cache_8bit: kv_cache_type = shared.args.cache_type.lower()
if kv_cache_type == 'fp16':
cache_type = ExLlamaV2Cache
elif kv_cache_type == 'fp8':
cache_type = ExLlamaV2Cache_8bit cache_type = ExLlamaV2Cache_8bit
elif shared.args.cache_4bit: elif kv_cache_type == 'q8':
cache_type = ExLlamaV2Cache_Q8
elif kv_cache_type == 'q6':
cache_type = ExLlamaV2Cache_Q6
elif kv_cache_type == 'q4':
cache_type = ExLlamaV2Cache_Q4 cache_type = ExLlamaV2Cache_Q4
else: else:
cache_type = ExLlamaV2Cache raise ValueError(f"Invalid cache type for ExLlamaV2: {cache_type}. Valid options are: fp16, fp8, q8, q6, q4.")
# Use TP if specified # Use TP if specified
if shared.args.enable_tp: if shared.args.enable_tp:

View File

@ -104,6 +104,8 @@ def convert_to_markdown(string):
result = '' result = ''
is_code = False is_code = False
is_latex = False is_latex = False
previous_line_empty = True
for line in string.split('\n'): for line in string.split('\n'):
stripped_line = line.strip() stripped_line = line.strip()
@ -120,13 +122,20 @@ def convert_to_markdown(string):
elif stripped_line.endswith('\\\\]'): elif stripped_line.endswith('\\\\]'):
is_latex = False is_latex = False
result += line # Preserve indentation for lists and code blocks
if stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\d+\.', stripped_line):
# Don't add an extra \n for tables, code, or LaTeX result += line + '\n'
if is_code or is_latex or line.startswith('|'): previous_line_empty = False
result += '\n' elif is_code or is_latex or line.startswith('|'):
result += line + '\n'
previous_line_empty = False
else: else:
result += '\n\n' if previous_line_empty:
result += line.strip() + '\n'
else:
result += line.strip() + '\n\n'
previous_line_empty = stripped_line == ''
result = result.strip() result = result.strip()
if is_code: if is_code:
@ -145,14 +154,15 @@ def convert_to_markdown(string):
result = re.sub(list_item_pattern, r'\g<1> ' + delete_str, result) result = re.sub(list_item_pattern, r'\g<1> ' + delete_str, result)
# Convert to HTML using markdown # Convert to HTML using markdown
html_output = markdown.markdown(result, extensions=['fenced_code', 'tables']) html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'], tab_length=2)
# Remove the delete string from the HTML output # Remove the delete string from the HTML output
pos = html_output.rfind(delete_str) pos = html_output.rfind(delete_str)
if pos > -1: if pos > -1:
html_output = html_output[:pos] + html_output[pos + len(delete_str):] html_output = html_output[:pos] + html_output[pos + len(delete_str):]
else: else:
html_output = markdown.markdown(result, extensions=['fenced_code', 'tables']) # Convert to HTML using markdown
html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'], tab_length=2)
# Unescape code blocks # Unescape code blocks
pattern = re.compile(r'<code[^>]*>(.*?)</code>', re.DOTALL) pattern = re.compile(r'<code[^>]*>(.*?)</code>', re.DOTALL)

View File

@ -9,6 +9,7 @@ from transformers.modeling_outputs import CausalLMOutputWithPast
from modules import shared from modules import shared
from modules.llama_cpp_python_hijack import llama_cpp_lib from modules.llama_cpp_python_hijack import llama_cpp_lib
from modules.llamacpp_model import get_llamacpp_cache_type_for_string
from modules.logging_colors import logger from modules.logging_colors import logger
@ -196,12 +197,9 @@ class LlamacppHF(PreTrainedModel):
'flash_attn': shared.args.flash_attn 'flash_attn': shared.args.flash_attn
} }
if shared.args.cache_4bit: if shared.args.cache_type != 'fp16':
params["type_k"] = 2 params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
params["type_v"] = 2 params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
elif shared.args.cache_8bit:
params["type_k"] = 8
params["type_v"] = 8
Llama = llama_cpp_lib().Llama Llama = llama_cpp_lib().Llama
model = Llama(**params) model = Llama(**params)

View File

@ -10,6 +10,35 @@ from modules.llama_cpp_python_hijack import llama_cpp_lib
from modules.logging_colors import logger from modules.logging_colors import logger
from modules.text_generation import get_max_prompt_length from modules.text_generation import get_max_prompt_length
llamacpp_quant_mapping = {
'f32': 0,
'fp16': 1,
'q4_0': 2,
'q4_1': 3,
'q5_0': 6,
'q5_1': 7,
'q8_0': 8,
'q8_1': 9,
'q2_k': 10,
'q3_k': 11,
'q4_k': 12,
'q5_k': 13,
'q6_k': 14,
'q8_k': 15,
'iq4_nl': 20,
'bf16': 30,
}
llamacpp_valid_cache_types = {'fp16', 'q8_0', 'q4_0'}
def get_llamacpp_cache_type_for_string(quant_type: str):
quant_type = quant_type.lower()
if quant_type in llamacpp_valid_cache_types:
return llamacpp_quant_mapping[quant_type]
else:
raise ValueError(f"Invalid cache type for llama.cpp: {quant_type}. Valid options are: fp16, q8_0, q4_0.")
def ban_eos_logits_processor(eos_token, input_ids, logits): def ban_eos_logits_processor(eos_token, input_ids, logits):
logits[eos_token] = -float('inf') logits[eos_token] = -float('inf')
@ -75,12 +104,9 @@ class LlamaCppModel:
'flash_attn': shared.args.flash_attn 'flash_attn': shared.args.flash_attn
} }
if shared.args.cache_4bit: if shared.args.cache_type != 'fp16':
params["type_k"] = 2 params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
params["type_v"] = 2 params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
elif shared.args.cache_8bit:
params["type_k"] = 8
params["type_v"] = 8
result.model = Llama(**params) result.model = Llama(**params)
if cache_capacity > 0: if cache_capacity > 0:

View File

@ -31,8 +31,7 @@ loaders_and_params = OrderedDict({
'llama.cpp': [ 'llama.cpp': [
'n_ctx', 'n_ctx',
'n_gpu_layers', 'n_gpu_layers',
'cache_8bit', 'cache_type',
'cache_4bit',
'tensor_split', 'tensor_split',
'n_batch', 'n_batch',
'threads', 'threads',
@ -54,8 +53,7 @@ loaders_and_params = OrderedDict({
'llamacpp_HF': [ 'llamacpp_HF': [
'n_ctx', 'n_ctx',
'n_gpu_layers', 'n_gpu_layers',
'cache_8bit', 'cache_type',
'cache_4bit',
'tensor_split', 'tensor_split',
'n_batch', 'n_batch',
'threads', 'threads',
@ -87,8 +85,7 @@ loaders_and_params = OrderedDict({
'no_xformers', 'no_xformers',
'no_sdpa', 'no_sdpa',
'num_experts_per_token', 'num_experts_per_token',
'cache_8bit', 'cache_type',
'cache_4bit',
'autosplit', 'autosplit',
'enable_tp', 'enable_tp',
'alpha_value', 'alpha_value',
@ -103,8 +100,7 @@ loaders_and_params = OrderedDict({
'no_xformers', 'no_xformers',
'no_sdpa', 'no_sdpa',
'num_experts_per_token', 'num_experts_per_token',
'cache_8bit', 'cache_type',
'cache_4bit',
'autosplit', 'autosplit',
'enable_tp', 'enable_tp',
'alpha_value', 'alpha_value',

View File

@ -81,7 +81,6 @@ group.add_argument('--model-menu', action='store_true', help='Show a model menu
group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.') group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.')
group.add_argument('--extensions', type=str, nargs='+', help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.') group.add_argument('--extensions', type=str, nargs='+', help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
group.add_argument('--chat-buttons', action='store_true', help='Show buttons on the chat tab instead of a hover menu.')
group.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.') group.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.')
# Model loader # Model loader
@ -143,8 +142,6 @@ group.add_argument('--cfg-cache', action='store_true', help='ExLlamav2_HF: Creat
group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.') group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.')
group.add_argument('--no_xformers', action='store_true', help='Force xformers to not be used.') group.add_argument('--no_xformers', action='store_true', help='Force xformers to not be used.')
group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to not be used.') group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to not be used.')
group.add_argument('--cache_8bit', action='store_true', help='Use 8-bit cache to save VRAM.')
group.add_argument('--cache_4bit', action='store_true', help='Use Q4 cache to save VRAM.')
group.add_argument('--num_experts_per_token', type=int, default=2, help='Number of experts to use for generation. Applies to MoE models like Mixtral.') group.add_argument('--num_experts_per_token', type=int, default=2, help='Number of experts to use for generation. Applies to MoE models like Mixtral.')
group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.') group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.')
@ -167,6 +164,10 @@ group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='B
group = parser.add_argument_group('TensorRT-LLM') group = parser.add_argument_group('TensorRT-LLM')
group.add_argument('--cpp-runner', action='store_true', help='Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn\'t support streaming yet.') group.add_argument('--cpp-runner', action='store_true', help='Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn\'t support streaming yet.')
# Cache
group = parser.add_argument_group('Cache')
group.add_argument('--cache_type', type=str, default='fp16', help='KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
# DeepSpeed # DeepSpeed
group = parser.add_argument_group('DeepSpeed') group = parser.add_argument_group('DeepSpeed')
group.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.') group.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
@ -191,6 +192,7 @@ group.add_argument('--gradio-auth-path', type=str, help='Set the Gradio authenti
group.add_argument('--ssl-keyfile', type=str, help='The path to the SSL certificate key file.', default=None) group.add_argument('--ssl-keyfile', type=str, help='The path to the SSL certificate key file.', default=None)
group.add_argument('--ssl-certfile', type=str, help='The path to the SSL certificate cert file.', default=None) group.add_argument('--ssl-certfile', type=str, help='The path to the SSL certificate cert file.', default=None)
group.add_argument('--subpath', type=str, help='Customize the subpath for gradio, use with reverse proxy') group.add_argument('--subpath', type=str, help='Customize the subpath for gradio, use with reverse proxy')
group.add_argument('--old-colors', action='store_true', help='Use the legacy Gradio colors, before the December/2024 update.')
# API # API
group = parser.add_argument_group('API') group = parser.add_argument_group('API')
@ -213,6 +215,9 @@ group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED')
group.add_argument('--checkpoint', type=str, help='DEPRECATED') group.add_argument('--checkpoint', type=str, help='DEPRECATED')
group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED')
group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED') group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED')
group.add_argument('--cache_4bit', action='store_true', help='DEPRECATED')
group.add_argument('--cache_8bit', action='store_true', help='DEPRECATED')
group.add_argument('--chat-buttons', action='store_true', help='DEPRECATED')
args = parser.parse_args() args = parser.parse_args()
args_defaults = parser.parse_args([]) args_defaults = parser.parse_args([])
@ -269,6 +274,58 @@ def fix_loader_name(name):
return 'TensorRT-LLM' return 'TensorRT-LLM'
def transform_legacy_kv_cache_options(opts):
# Handle both argparse.Namespace and dict here
def get(key):
return opts.get(key) if isinstance(opts, dict) else getattr(opts, key, None)
def set(key, value):
if isinstance(opts, dict):
opts[key] = value
else:
setattr(opts, key, value)
def del_key(key, fallback_set):
# only remove from user dict, can't delete from argparse.Namespace
if type(opts) is dict:
if key in opts:
del opts[key]
else:
setattr(opts, key, fallback_set)
# Retrieve values
loader = get('loader')
cache_8bit = get('cache_8bit')
cache_4bit = get('cache_4bit')
# Determine cache type based on loader or legacy flags
if cache_8bit or cache_4bit:
if not loader:
# Legacy behavior: prefer 8-bit over 4-bit to minimize breakage
if cache_8bit:
set('cache_type', 'fp8')
elif cache_4bit:
set('cache_type', 'q4')
elif loader.lower() in ['exllamav2', 'exllamav2_hf']:
# ExLlamaV2 loader-specific cache type
if cache_8bit:
set('cache_type', 'fp8')
elif cache_4bit:
set('cache_type', 'q4')
elif loader.lower() in ['llama.cpp', 'llamacpp_hf']:
# Llama.cpp loader-specific cache type
if cache_4bit:
set('cache_type', 'q4_0')
elif cache_8bit:
set('cache_type', 'q8_0')
# Clean up legacy keys
del_key('cache_4bit', False)
del_key('cache_8bit', False)
return opts
def add_extension(name, last=False): def add_extension(name, last=False):
if args.extensions is None: if args.extensions is None:
args.extensions = [name] args.extensions = [name]
@ -297,10 +354,14 @@ def load_user_config():
else: else:
user_config = {} user_config = {}
for model_name in user_config:
user_config[model_name] = transform_legacy_kv_cache_options(user_config[model_name])
return user_config return user_config
args.loader = fix_loader_name(args.loader) args.loader = fix_loader_name(args.loader)
args = transform_legacy_kv_cache_options(args)
# Activate the multimodal extension # Activate the multimodal extension
if args.multimodal_pipeline is not None: if args.multimodal_pipeline is not None:

View File

@ -50,6 +50,50 @@ theme = gr.themes.Default(
button_secondary_border_color="var(--border-color-primary)" button_secondary_border_color="var(--border-color-primary)"
) )
if not shared.args.old_colors:
theme = theme.set(
# General Colors
border_color_primary='#c5c5d2',
body_text_color_subdued='#484848',
background_fill_secondary='#eaeaea',
background_fill_secondary_dark='var(--selected-item-color-dark)',
background_fill_primary='var(--neutral-50)',
background_fill_primary_dark='var(--darker-gray)',
body_background_fill="white",
block_background_fill="transparent",
body_text_color="#333",
button_secondary_background_fill="#f4f4f4",
button_secondary_border_color="var(--border-color-primary)",
# Dark Mode Colors
input_background_fill_dark='var(--darker-gray)',
checkbox_background_color_dark='var(--darker-gray)',
block_background_fill_dark='transparent',
block_border_color_dark='transparent',
input_border_color_dark='var(--border-color-dark)',
checkbox_border_color_dark='var(--border-color-dark)',
border_color_primary_dark='var(--border-color-dark)',
button_secondary_border_color_dark='var(--border-color-dark)',
body_background_fill_dark='var(--dark-gray)',
button_primary_background_fill_dark='transparent',
button_secondary_background_fill_dark='transparent',
checkbox_label_background_fill_dark='transparent',
button_cancel_background_fill_dark='transparent',
button_secondary_background_fill_hover_dark='var(--selected-item-color-dark)',
checkbox_label_background_fill_hover_dark='var(--selected-item-color-dark)',
table_even_background_fill_dark='var(--darker-gray)',
table_odd_background_fill_dark='var(--selected-item-color-dark)',
code_background_fill_dark='var(--darker-gray)',
# Shadows and Radius
checkbox_label_shadow='none',
block_shadow='none',
block_shadow_dark='none',
button_large_radius='0.375rem',
button_large_padding='6px 12px',
input_radius='0.375rem',
)
if Path("notification.mp3").exists(): if Path("notification.mp3").exists():
audio_notification_js = "document.querySelector('#audio_notification audio')?.play();" audio_notification_js = "document.querySelector('#audio_notification audio')?.play();"
else: else:
@ -87,8 +131,7 @@ def list_model_elements():
'no_xformers', 'no_xformers',
'no_sdpa', 'no_sdpa',
'num_experts_per_token', 'num_experts_per_token',
'cache_8bit', 'cache_type',
'cache_4bit',
'autosplit', 'autosplit',
'enable_tp', 'enable_tp',
'threads', 'threads',
@ -232,10 +275,10 @@ def gather_interface_values(*args):
def apply_interface_values(state, use_persistent=False): def apply_interface_values(state, use_persistent=False):
if use_persistent: if use_persistent:
state = shared.persistent_interface_state state = shared.persistent_interface_state
if 'textbox-default' in state: if 'textbox-default' in state and 'prompt_menu-default' in state:
state.pop('prompt_menu-default') state.pop('prompt_menu-default')
if 'textbox-notebook' in state: if 'textbox-notebook' and 'prompt_menu-notebook' in state:
state.pop('prompt_menu-notebook') state.pop('prompt_menu-notebook')
elements = list_interface_input_elements() elements = list_interface_input_elements()

View File

@ -12,7 +12,6 @@ from modules.utils import gradio
inputs = ('Chat input', 'interface_state') inputs = ('Chat input', 'interface_state')
reload_arr = ('history', 'name1', 'name2', 'mode', 'chat_style', 'character_menu') reload_arr = ('history', 'name1', 'name2', 'mode', 'chat_style', 'character_menu')
clear_arr = ('delete_chat-confirm', 'delete_chat', 'delete_chat-cancel')
def create_ui(): def create_ui():
@ -21,7 +20,27 @@ def create_ui():
shared.gradio['Chat input'] = gr.State() shared.gradio['Chat input'] = gr.State()
shared.gradio['history'] = gr.JSON({'internal': [], 'visible': []}, visible=False) shared.gradio['history'] = gr.JSON({'internal': [], 'visible': []}, visible=False)
with gr.Tab('Chat', elem_id='chat-tab', elem_classes=("old-ui" if shared.args.chat_buttons else None)): with gr.Tab('Chat', elem_id='chat-tab'):
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row(elem_id='past-chats-buttons'):
shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']:
shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'])
with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']:
shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background'])
with gr.Row():
shared.gradio['rename_to-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['rename_to-confirm'] = gr.Button('Confirm', elem_classes=['refresh-button', 'focus-on-chat-input'], variant='primary')
with gr.Row():
shared.gradio['unique_id'] = gr.Radio(label="", elem_classes=['slim-dropdown', 'pretty_scrollbar'], interactive=not mu, elem_id='past-chats')
with gr.Row(): with gr.Row():
with gr.Column(elem_id='chat-col'): with gr.Column(elem_id='chat-col'):
shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', '')) shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', ''))
@ -60,25 +79,6 @@ def create_ui():
shared.gradio['send-chat-to-default'] = gr.Button('Send to default') shared.gradio['send-chat-to-default'] = gr.Button('Send to default')
shared.gradio['send-chat-to-notebook'] = gr.Button('Send to notebook') shared.gradio['send-chat-to-notebook'] = gr.Button('Send to notebook')
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row():
shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
with gr.Row(elem_id='rename-row'):
shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', visible=False, elem_classes=['no-background'])
with gr.Row():
shared.gradio['rename_to-confirm'] = gr.Button('Confirm', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['rename_to-cancel'] = gr.Button('Cancel', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input'])
gr.Markdown("Past chats")
with gr.Row():
shared.gradio['unique_id'] = gr.Radio(label="", elem_classes=['slim-dropdown', 'pretty_scrollbar'], interactive=not mu, elem_id='past-chats')
with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']): with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
with gr.Column(): with gr.Column():
with gr.Row(): with gr.Row():
@ -180,29 +180,39 @@ def create_event_handlers():
shared.gradio['Generate'].click( shared.gradio['Generate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox'].submit( shared.gradio['textbox'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Regenerate'].click( shared.gradio['Regenerate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Continue'].click( shared.gradio['Continue'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then(
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Impersonate'].click( shared.gradio['Impersonate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then(
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then( chat.impersonate_wrapper, gradio(inputs), gradio('textbox', 'display'), show_progress=False).then(
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Replace last reply'].click( shared.gradio['Replace last reply'].click(
@ -234,21 +244,21 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False) chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
shared.gradio['delete_chat'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, gradio(clear_arr)) shared.gradio['delete_chat'].click(lambda: gr.update(visible=True), None, gradio('delete-chat-row'))
shared.gradio['delete_chat-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr)) shared.gradio['delete_chat-cancel'].click(lambda: gr.update(visible=False), None, gradio('delete-chat-row'))
shared.gradio['delete_chat-confirm'].click( shared.gradio['delete_chat-confirm'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id') + gradio(clear_arr), show_progress=False) chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'delete-chat-row'), show_progress=False)
shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename-row'), show_progress=False)
shared.gradio['rename_to-cancel'].click(lambda: [gr.update(visible=False)] * 3, None, gradio('rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) shared.gradio['rename_to-cancel'].click(lambda: gr.update(visible=False), None, gradio('rename-row'), show_progress=False)
shared.gradio['rename_to-confirm'].click( shared.gradio['rename_to-confirm'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename-row'))
shared.gradio['rename_to'].submit( shared.gradio['rename_to'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename_to', 'rename_to-confirm', 'rename_to-cancel'), show_progress=False) chat.handle_rename_chat_confirm, gradio('rename_to', 'interface_state'), gradio('unique_id', 'rename-row'), show_progress=False)
shared.gradio['load_chat_history'].upload( shared.gradio['load_chat_history'].upload(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(

View File

@ -20,12 +20,12 @@ def create_ui():
with gr.Column(): with gr.Column():
with gr.Row(): with gr.Row():
shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar']) shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
shared.gradio['token-counter-default'] = gr.HTML(value="<span>0</span>", elem_classes=["token-counter", "default-token-counter"]) shared.gradio['token-counter-default'] = gr.HTML(value="<span>0</span>", elem_id="default-token-counter")
with gr.Row(): with gr.Row():
shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')
shared.gradio['Stop-default'] = gr.Button('Stop', elem_id='stop')
shared.gradio['Continue-default'] = gr.Button('Continue') shared.gradio['Continue-default'] = gr.Button('Continue')
shared.gradio['Stop-default'] = gr.Button('Stop', elem_id='stop', visible=False)
shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')
with gr.Row(): with gr.Row():
shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
@ -63,20 +63,26 @@ def create_ui():
def create_event_handlers(): def create_event_handlers():
shared.gradio['Generate-default'].click( shared.gradio['Generate-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox-default'].submit( shared.gradio['textbox-default'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Continue-default'].click( shared.gradio['Continue-default'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-default', 'Generate-default')).then(
generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then(
lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then( lambda state, left, right: state.update({'textbox-default': left, 'output_textbox': right}), gradio('interface_state', 'textbox-default', 'output_textbox'), None).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-default', 'Generate-default')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False)

View File

@ -118,8 +118,7 @@ def create_ui():
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.') shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.') shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.') shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
shared.gradio['cache_4bit'] = gr.Checkbox(label="cache_4bit", value=shared.args.cache_4bit, info='Use Q4 cache to save VRAM.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.') shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, precision=0, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.') shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
@ -195,13 +194,13 @@ def create_event_handlers():
shared.gradio['model_menu'].change( shared.gradio['model_menu'].change(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
handle_load_model_event_initial, gradio('model_menu', 'interface_state'), gradio(ui.list_interface_input_elements()) + gradio('interface_state'), show_progress=False).then( handle_load_model_event_initial, gradio('model_menu', 'interface_state'), gradio(ui.list_interface_input_elements()) + gradio('interface_state'), show_progress=False).then(
load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False).success( load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=True).success(
handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False) handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False)
shared.gradio['load_model'].click( shared.gradio['load_model'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
update_model_parameters, gradio('interface_state'), None).then( update_model_parameters, gradio('interface_state'), None).then(
partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False).success( partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=True).success(
handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False) handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False)
shared.gradio['unload_model'].click(handle_unload_model_click, None, gradio('model_status'), show_progress=False) shared.gradio['unload_model'].click(handle_unload_model_click, None, gradio('model_status'), show_progress=False)
@ -260,6 +259,8 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
yield ("Please enter a model path") yield ("Please enter a model path")
return return
repo_id = repo_id.strip()
specific_file = specific_file.strip()
downloader = importlib.import_module("download-model").ModelDownloader() downloader = importlib.import_module("download-model").ModelDownloader()
progress(0.0) progress(0.0)
@ -297,7 +298,7 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
downloader.check_model_files(model, branch, links, sha256, output_folder) downloader.check_model_files(model, branch, links, sha256, output_folder)
progress(1.0) progress(1.0)
else: else:
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}`") yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp) downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
yield (f"Model successfully saved to `{output_folder}/`.") yield (f"Model successfully saved to `{output_folder}/`.")
@ -317,7 +318,7 @@ def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()):
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True) links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True)
output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF") output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF")
yield (f"Downloading tokenizer to `{output_folder}`") yield (f"Downloading tokenizer to `{output_folder}/`")
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False) downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False)
# Move the GGUF # Move the GGUF

View File

@ -23,7 +23,7 @@ def create_ui():
with gr.Tab('Raw'): with gr.Tab('Raw'):
with gr.Row(): with gr.Row():
shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar']) shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
shared.gradio['token-counter-notebook'] = gr.HTML(value="<span>0</span>", elem_classes=["token-counter"]) shared.gradio['token-counter-notebook'] = gr.HTML(value="<span>0</span>", elem_id="notebook-token-counter")
with gr.Tab('Markdown'): with gr.Tab('Markdown'):
shared.gradio['markdown_render-notebook'] = gr.Button('Render') shared.gradio['markdown_render-notebook'] = gr.Button('Render')
@ -48,10 +48,10 @@ def create_ui():
shared.gradio['tokens-notebook'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits_notebook', 'add_scrollbar', 'monospace']) shared.gradio['tokens-notebook'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits_notebook', 'add_scrollbar', 'monospace'])
with gr.Row(): with gr.Row():
shared.gradio['Generate-notebook'] = gr.Button('Generate', variant='primary', elem_classes='small-button')
shared.gradio['Stop-notebook'] = gr.Button('Stop', elem_classes='small-button', elem_id='stop')
shared.gradio['Undo'] = gr.Button('Undo', elem_classes='small-button') shared.gradio['Undo'] = gr.Button('Undo', elem_classes='small-button')
shared.gradio['Regenerate-notebook'] = gr.Button('Regenerate', elem_classes='small-button') shared.gradio['Regenerate-notebook'] = gr.Button('Regenerate', elem_classes='small-button')
shared.gradio['Stop-notebook'] = gr.Button('Stop', visible=False, elem_classes='small-button', elem_id='stop')
shared.gradio['Generate-notebook'] = gr.Button('Generate', variant='primary', elem_classes='small-button')
with gr.Column(scale=1): with gr.Column(scale=1):
gr.HTML('<div style="padding-bottom: 13px"></div>') gr.HTML('<div style="padding-bottom: 13px"></div>')
@ -66,22 +66,28 @@ def create_event_handlers():
shared.gradio['Generate-notebook'].click( shared.gradio['Generate-notebook'].click(
lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['textbox-notebook'].submit( shared.gradio['textbox-notebook'].submit(
lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Regenerate-notebook'].click( shared.gradio['Regenerate-notebook'].click(
lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then( lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('Stop-notebook', 'Generate-notebook')).then(
generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then(
lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then( lambda state, text: state.update({'textbox-notebook': text}), gradio('interface_state', 'textbox-notebook'), None).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('Stop-notebook', 'Generate-notebook')).then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}') None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
shared.gradio['Undo'].click( shared.gradio['Undo'].click(

View File

@ -1,10 +1,10 @@
accelerate==1.0.* accelerate==1.2.*
bitsandbytes==0.44.* bitsandbytes==0.45.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb
@ -32,30 +32,30 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores) # llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores) # llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels # CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

View File

@ -1,9 +1,9 @@
accelerate==1.0.* accelerate==1.2.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -20,7 +20,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb
@ -31,14 +31,14 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels # AMD wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.1+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.5+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.1+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.5+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View File

@ -1,9 +1,9 @@
accelerate==1.0.* accelerate==1.2.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -20,7 +20,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb
@ -31,12 +31,12 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels # AMD wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View File

@ -1,9 +1,9 @@
accelerate==1.0.* accelerate==1.2.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -20,7 +20,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb
@ -31,8 +31,6 @@ sse-starlette==1.6.5
tiktoken tiktoken
# Mac wheels # Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl

View File

@ -1,9 +1,9 @@
accelerate==1.0.* accelerate==1.2.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -20,7 +20,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb
@ -31,10 +31,8 @@ sse-starlette==1.6.5
tiktoken tiktoken
# Mac wheels # Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.5-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.1-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl

View File

@ -1,9 +1,9 @@
accelerate==1.0.* accelerate==1.2.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -20,7 +20,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb
@ -31,7 +31,7 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View File

@ -1,9 +1,9 @@
accelerate==1.0.* accelerate==1.2.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -20,7 +20,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb
@ -31,7 +31,7 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View File

@ -1,10 +1,10 @@
accelerate==1.0.* accelerate==1.2.*
bitsandbytes==0.44.* bitsandbytes==0.45.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -21,7 +21,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb
@ -32,30 +32,30 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.1+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.5+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, no tensor cores) # llama-cpp-python (CUDA, no tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.1+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.5+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, tensor cores) # llama-cpp-python (CUDA, tensor cores)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.1+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.5+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels # CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.3/exllamav2-0.2.3-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.2.6/exllamav2-0.2.6-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

View File

@ -1,9 +1,9 @@
accelerate==1.0.* accelerate==1.2.*
colorama colorama
datasets datasets
einops einops
fastapi==0.112.4 fastapi==0.112.4
gradio==4.26.* gradio==4.37.*
jinja2==3.1.4 jinja2==3.1.4
markdown markdown
numba==0.59.* numba==0.59.*
@ -20,7 +20,7 @@ safetensors==0.4.*
scipy scipy
sentencepiece sentencepiece
tensorboard tensorboard
transformers==4.46.* transformers==4.47.*
tqdm tqdm
wandb wandb