mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-13 22:09:19 +01:00
commit
d1bba48a83
@ -169,7 +169,7 @@ cp docker/.env.example .env
|
|||||||
docker compose up --build
|
docker compose up --build
|
||||||
```
|
```
|
||||||
|
|
||||||
* You need to have docker compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.
|
* You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.
|
||||||
* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
|
* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
|
||||||
|
|
||||||
### Updating the requirements
|
### Updating the requirements
|
||||||
@ -325,7 +325,6 @@ Optionally, you can use the following command-line flags:
|
|||||||
| `--mlock` | Force the system to keep the model in RAM. |
|
| `--mlock` | Force the system to keep the model in RAM. |
|
||||||
| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. |
|
| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. |
|
||||||
| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17. |
|
| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17. |
|
||||||
| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default is 0 (random). |
|
|
||||||
| `--numa` | Activate NUMA task allocation for llama.cpp. |
|
| `--numa` | Activate NUMA task allocation for llama.cpp. |
|
||||||
| `--logits_all`| Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower. |
|
| `--logits_all`| Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower. |
|
||||||
| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
|
| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
|
||||||
@ -414,6 +413,8 @@ Optionally, you can use the following command-line flags:
|
|||||||
| `--public-api-id PUBLIC_API_ID` | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. |
|
| `--public-api-id PUBLIC_API_ID` | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. |
|
||||||
| `--api-port API_PORT` | The listening port for the API. |
|
| `--api-port API_PORT` | The listening port for the API. |
|
||||||
| `--api-key API_KEY` | API authentication key. |
|
| `--api-key API_KEY` | API authentication key. |
|
||||||
|
| `--admin-key ADMIN_KEY` | API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key. |
|
||||||
|
| `--nowebui` | Do not launch the Gradio UI. Useful for launching the API in standalone mode. |
|
||||||
|
|
||||||
#### Multimodal
|
#### Multimodal
|
||||||
|
|
||||||
|
20
css/main.css
20
css/main.css
@ -648,3 +648,23 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||||||
.options {
|
.options {
|
||||||
z-index: 100 !important;
|
z-index: 100 !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------
|
||||||
|
Big profile picture for characters
|
||||||
|
---------------------------------------------- */
|
||||||
|
.bigProfilePicture {
|
||||||
|
position: fixed;
|
||||||
|
bottom: 0;
|
||||||
|
left: 0;
|
||||||
|
width: calc((100vw - 880px - 120px) /2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.pfp_character:hover {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@media screen and (width <= 1300px) {
|
||||||
|
.bigProfilePicture {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -5,5 +5,4 @@ Dockerfile
|
|||||||
/models
|
/models
|
||||||
/presets
|
/presets
|
||||||
/prompts
|
/prompts
|
||||||
/softprompts
|
|
||||||
/training
|
/training
|
||||||
|
@ -3,13 +3,8 @@
|
|||||||
# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
|
# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
|
||||||
TORCH_CUDA_ARCH_LIST=7.5
|
TORCH_CUDA_ARCH_LIST=7.5
|
||||||
|
|
||||||
# these commands worked for me with roughly 4.5GB of vram
|
# your command-line flags go here:
|
||||||
CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
|
CLI_ARGS=
|
||||||
|
|
||||||
# the following examples have been tested with the files linked in docs/README_docker.md:
|
|
||||||
# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
|
|
||||||
# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
|
|
||||||
# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
|
|
||||||
|
|
||||||
# the port the webui binds to on the host
|
# the port the webui binds to on the host
|
||||||
HOST_PORT=7860
|
HOST_PORT=7860
|
||||||
@ -21,10 +16,5 @@ HOST_API_PORT=5000
|
|||||||
# the port the api binds to inside the container
|
# the port the api binds to inside the container
|
||||||
CONTAINER_API_PORT=5000
|
CONTAINER_API_PORT=5000
|
||||||
|
|
||||||
# the port the api stream endpoint binds to on the host
|
|
||||||
HOST_API_STREAM_PORT=5005
|
|
||||||
# the port the api stream endpoint binds to inside the container
|
|
||||||
CONTAINER_API_STREAM_PORT=5005
|
|
||||||
|
|
||||||
# the version used to install text-generation-webui from
|
# the version used to install text-generation-webui from
|
||||||
WEBUI_VERSION=HEAD
|
WEBUI_VERSION=HEAD
|
||||||
|
@ -73,5 +73,5 @@ RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
|||||||
|
|
||||||
ENV CLI_ARGS=""
|
ENV CLI_ARGS=""
|
||||||
|
|
||||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
|
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}
|
||||||
CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
|
CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
|
||||||
|
@ -11,7 +11,6 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
|
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
|
||||||
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
|
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
|
||||||
- "${HOST_API_STREAM_PORT:-5005}:${CONTAINER_API_STREAM_PORT:-5005}"
|
|
||||||
stdin_open: true
|
stdin_open: true
|
||||||
tty: true
|
tty: true
|
||||||
volumes:
|
volumes:
|
||||||
|
@ -11,9 +11,13 @@ LLMs work by generating one token at a time. Given your prompt, the model calcul
|
|||||||
|
|
||||||
### Preset menu
|
### Preset menu
|
||||||
|
|
||||||
Can be used to save combinations of parameters for reuse.
|
Can be used to save and load combinations of parameters for reuse.
|
||||||
|
|
||||||
The built-in presets were not manually chosen. They were obtained after a blind contest called "Preset Arena" where hundreds of people voted. The full results can be found [here](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md).
|
* **🎲 button**: creates a random yet interpretable preset. Only 1 parameter of each category is included for the categories: removing tail tokens, avoiding repetition, and flattening the distribution. That is, top_p and top_k are not mixed, and neither are repetition_penalty and frequency_penalty. You can use this button to break out of a loop of bad generations after multiple "Regenerate" attempts.
|
||||||
|
|
||||||
|
#### Built-in presets
|
||||||
|
|
||||||
|
These were obtained after a blind contest called "Preset Arena" where hundreds of people voted. The full results can be found [here](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md).
|
||||||
|
|
||||||
A key takeaway is that the best presets are:
|
A key takeaway is that the best presets are:
|
||||||
|
|
||||||
|
@ -1,13 +1,21 @@
|
|||||||
Docker Compose is a way of installing and launching the web UI in an isolated Ubuntu image using only a few commands.
|
Docker Compose is a way of installing and launching the web UI in an isolated Ubuntu image using only a few commands.
|
||||||
|
|
||||||
In order to create the image as described in the main README, you must have docker compose 2.17 or higher:
|
## Installing Docker Compose
|
||||||
|
|
||||||
|
In order to create the image as described in the main README, you must have Docker Compose installed (2.17 or higher is recommended):
|
||||||
|
|
||||||
```
|
```
|
||||||
~$ docker compose version
|
~$ docker compose version
|
||||||
Docker Compose version v2.17.2
|
Docker Compose version v2.21.0
|
||||||
```
|
```
|
||||||
|
|
||||||
Make sure to also create the necessary symbolic links:
|
The installation instructions for various Linux distributions can be found here:
|
||||||
|
|
||||||
|
https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository
|
||||||
|
|
||||||
|
## Launching the image
|
||||||
|
|
||||||
|
Use these commands to launch the image:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd text-generation-webui
|
cd text-generation-webui
|
||||||
@ -17,13 +25,11 @@ cp docker/.env.example .env
|
|||||||
docker compose up --build
|
docker compose up --build
|
||||||
```
|
```
|
||||||
|
|
||||||
## Table of contents
|
## More detailed installation instructions
|
||||||
|
|
||||||
* [Docker Compose installation instructions](#docker-compose-installation-instructions)
|
* [Docker Compose installation instructions](#docker-compose-installation-instructions)
|
||||||
* [Repository with additional Docker files](#dedicated-docker-repository)
|
* [Repository with additional Docker files](#dedicated-docker-repository)
|
||||||
|
|
||||||
## Docker Compose installation instructions
|
|
||||||
|
|
||||||
By [@loeken](https://github.com/loeken).
|
By [@loeken](https://github.com/loeken).
|
||||||
|
|
||||||
- [Ubuntu 22.04](#ubuntu-2204)
|
- [Ubuntu 22.04](#ubuntu-2204)
|
||||||
|
@ -97,6 +97,29 @@ curl http://127.0.0.1:5000/v1/chat/completions \
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Logits
|
||||||
|
|
||||||
|
```
|
||||||
|
curl -k http://127.0.0.1:5000/v1/internal/logits \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"prompt": "Who is best, Asuka or Rei? Answer:",
|
||||||
|
"use_samplers": false
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Logits after sampling parameters
|
||||||
|
|
||||||
|
```
|
||||||
|
curl -k http://127.0.0.1:5000/v1/internal/logits \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"prompt": "Who is best, Asuka or Rei? Answer:",
|
||||||
|
"use_samplers": true,
|
||||||
|
"top_k": 3
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
#### Python chat example
|
#### Python chat example
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
@ -2,13 +2,13 @@
|
|||||||
|
|
||||||
| Loader | Loading 1 LoRA | Loading 2 or more LoRAs | Training LoRAs | Multimodal extension | Perplexity evaluation |
|
| Loader | Loading 1 LoRA | Loading 2 or more LoRAs | Training LoRAs | Multimodal extension | Perplexity evaluation |
|
||||||
|----------------|----------------|-------------------------|----------------|----------------------|-----------------------|
|
|----------------|----------------|-------------------------|----------------|----------------------|-----------------------|
|
||||||
| Transformers | ✅ | ✅ | ✅* | ✅ | ✅ |
|
| Transformers | ✅ | ✅*** | ✅* | ✅ | ✅ |
|
||||||
| ExLlama_HF | ✅ | ❌ | ❌ | ❌ | ✅ |
|
| ExLlama_HF | ✅ | ❌ | ❌ | ❌ | ✅ |
|
||||||
| ExLlamav2_HF | ✅ | ✅ | ❌ | ❌ | ✅ |
|
| ExLlamav2_HF | ✅ | ✅ | ❌ | ❌ | ✅ |
|
||||||
| ExLlama | ✅ | ❌ | ❌ | ❌ | use ExLlama_HF |
|
| ExLlama | ✅ | ❌ | ❌ | ❌ | use ExLlama_HF |
|
||||||
| ExLlamav2 | ✅ | ✅ | ❌ | ❌ | use ExLlamav2_HF |
|
| ExLlamav2 | ✅ | ✅ | ❌ | ❌ | use ExLlamav2_HF |
|
||||||
| AutoGPTQ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
| AutoGPTQ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
||||||
| GPTQ-for-LLaMa | ✅** | ✅ | ✅ | ✅ | ✅ |
|
| GPTQ-for-LLaMa | ✅** | ✅*** | ✅ | ✅ | ✅ |
|
||||||
| llama.cpp | ❌ | ❌ | ❌ | ❌ | use llamacpp_HF |
|
| llama.cpp | ❌ | ❌ | ❌ | ❌ | use llamacpp_HF |
|
||||||
| llamacpp_HF | ❌ | ❌ | ❌ | ❌ | ✅ |
|
| llamacpp_HF | ❌ | ❌ | ❌ | ❌ | ✅ |
|
||||||
| ctransformers | ❌ | ❌ | ❌ | ❌ | ❌ |
|
| ctransformers | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||||
@ -21,3 +21,5 @@
|
|||||||
\* Training LoRAs with GPTQ models also works with the Transformers loader. Make sure to check "auto-devices" and "disable_exllama" before loading the model.
|
\* Training LoRAs with GPTQ models also works with the Transformers loader. Make sure to check "auto-devices" and "disable_exllama" before loading the model.
|
||||||
|
|
||||||
\*\* Requires the monkey-patch. The instructions can be found [here](https://github.com/oobabooga/text-generation-webui/wiki/08-%E2%80%90-Additional-Tips#using-loras-with-gptq-for-llama).
|
\*\* Requires the monkey-patch. The instructions can be found [here](https://github.com/oobabooga/text-generation-webui/wiki/08-%E2%80%90-Additional-Tips#using-loras-with-gptq-for-llama).
|
||||||
|
|
||||||
|
\*\*\* Multi-LoRA in PEFT is tricky and the current implementation does not work reliably in all cases.
|
||||||
|
@ -91,11 +91,13 @@ def ui():
|
|||||||
with gr.Accordion("Character gallery", open=False, elem_id='gallery-extension'):
|
with gr.Accordion("Character gallery", open=False, elem_id='gallery-extension'):
|
||||||
update = gr.Button("Refresh")
|
update = gr.Button("Refresh")
|
||||||
gr.HTML(value="<style>" + generate_css() + "</style>")
|
gr.HTML(value="<style>" + generate_css() + "</style>")
|
||||||
gallery = gr.Dataset(components=[gr.HTML(visible=False)],
|
gallery = gr.Dataset(
|
||||||
label="",
|
components=[gr.HTML(visible=False)],
|
||||||
samples=generate_html(),
|
label="",
|
||||||
elem_classes=["character-gallery"],
|
samples=generate_html(),
|
||||||
samples_per_page=50
|
elem_classes=["character-gallery"],
|
||||||
)
|
samples_per_page=50
|
||||||
|
)
|
||||||
|
|
||||||
update.click(generate_html, [], gallery)
|
update.click(generate_html, [], gallery)
|
||||||
gallery.select(select_character, None, gradio['character_menu'])
|
gallery.select(select_character, None, gradio['character_menu'])
|
||||||
|
@ -203,6 +203,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) -
|
|||||||
turn_template = body['turn_template'] or turn_template
|
turn_template = body['turn_template'] or turn_template
|
||||||
context_instruct = body['context_instruct'] or context_instruct
|
context_instruct = body['context_instruct'] or context_instruct
|
||||||
system_message = body['system_message'] or system_message
|
system_message = body['system_message'] or system_message
|
||||||
|
chat_instruct_command = body['chat_instruct_command'] or shared.settings['chat-instruct_command']
|
||||||
|
|
||||||
# Chat character
|
# Chat character
|
||||||
character = body['character'] or shared.settings['character']
|
character = body['character'] or shared.settings['character']
|
||||||
@ -228,7 +229,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) -
|
|||||||
'system_message': system_message,
|
'system_message': system_message,
|
||||||
'custom_system_message': custom_system_message,
|
'custom_system_message': custom_system_message,
|
||||||
'turn_template': turn_template,
|
'turn_template': turn_template,
|
||||||
'chat-instruct_command': body['chat_instruct_command'],
|
'chat-instruct_command': chat_instruct_command,
|
||||||
'history': history,
|
'history': history,
|
||||||
'stream': stream
|
'stream': stream
|
||||||
})
|
})
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from transformers import AutoModel
|
||||||
|
|
||||||
from extensions.openai.errors import ServiceUnavailableError
|
from extensions.openai.errors import ServiceUnavailableError
|
||||||
from extensions.openai.utils import debug_msg, float_list_to_base64
|
from extensions.openai.utils import debug_msg, float_list_to_base64
|
||||||
@ -41,7 +42,12 @@ def load_embedding_model(model: str):
|
|||||||
global embeddings_device, embeddings_model
|
global embeddings_device, embeddings_model
|
||||||
try:
|
try:
|
||||||
print(f"Try embedding model: {model} on {embeddings_device}")
|
print(f"Try embedding model: {model} on {embeddings_device}")
|
||||||
embeddings_model = SentenceTransformer(model, device=embeddings_device)
|
if 'jina-embeddings' in model:
|
||||||
|
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
|
||||||
|
embeddings_model = embeddings_model.to(embeddings_device)
|
||||||
|
else:
|
||||||
|
embeddings_model = SentenceTransformer(model, device=embeddings_device)
|
||||||
|
|
||||||
print(f"Loaded embedding model: {model}")
|
print(f"Loaded embedding model: {model}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
embeddings_model = None
|
embeddings_model = None
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
from modules import shared
|
from modules import shared
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
|
from modules.LoRA import add_lora_to_model
|
||||||
from modules.models import load_model, unload_model
|
from modules.models import load_model, unload_model
|
||||||
from modules.models_settings import get_model_metadata, update_model_parameters
|
from modules.models_settings import get_model_metadata, update_model_parameters
|
||||||
from modules.utils import get_available_models
|
from modules.utils import get_available_loras, get_available_models
|
||||||
|
|
||||||
|
|
||||||
def get_current_model_info():
|
def get_current_model_info():
|
||||||
@ -13,12 +14,17 @@ def get_current_model_info():
|
|||||||
|
|
||||||
|
|
||||||
def list_models():
|
def list_models():
|
||||||
|
return {'model_names': get_available_models()[1:]}
|
||||||
|
|
||||||
|
|
||||||
|
def list_dummy_models():
|
||||||
result = {
|
result = {
|
||||||
"object": "list",
|
"object": "list",
|
||||||
"data": []
|
"data": []
|
||||||
}
|
}
|
||||||
|
|
||||||
for model in get_dummy_models() + get_available_models()[1:]:
|
# these are expected by so much, so include some here as a dummy
|
||||||
|
for model in ['gpt-3.5-turbo', 'text-embedding-ada-002']:
|
||||||
result["data"].append(model_info_dict(model))
|
result["data"].append(model_info_dict(model))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
@ -33,13 +39,6 @@ def model_info_dict(model_name: str) -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_dummy_models() -> list:
|
|
||||||
return [ # these are expected by so much, so include some here as a dummy
|
|
||||||
'gpt-3.5-turbo',
|
|
||||||
'text-embedding-ada-002',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def _load_model(data):
|
def _load_model(data):
|
||||||
model_name = data["model_name"]
|
model_name = data["model_name"]
|
||||||
args = data["args"]
|
args = data["args"]
|
||||||
@ -67,3 +66,15 @@ def _load_model(data):
|
|||||||
logger.info(f"TRUNCATION LENGTH (UPDATED): {shared.settings['truncation_length']}")
|
logger.info(f"TRUNCATION LENGTH (UPDATED): {shared.settings['truncation_length']}")
|
||||||
elif k == 'instruction_template':
|
elif k == 'instruction_template':
|
||||||
logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")
|
logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")
|
||||||
|
|
||||||
|
|
||||||
|
def list_loras():
|
||||||
|
return {'lora_names': get_available_loras()[1:]}
|
||||||
|
|
||||||
|
|
||||||
|
def load_loras(lora_names):
|
||||||
|
add_lora_to_model(lora_names)
|
||||||
|
|
||||||
|
|
||||||
|
def unload_all_loras():
|
||||||
|
add_lora_to_model([])
|
||||||
|
@ -16,6 +16,7 @@ from sse_starlette import EventSourceResponse
|
|||||||
import extensions.openai.completions as OAIcompletions
|
import extensions.openai.completions as OAIcompletions
|
||||||
import extensions.openai.embeddings as OAIembeddings
|
import extensions.openai.embeddings as OAIembeddings
|
||||||
import extensions.openai.images as OAIimages
|
import extensions.openai.images as OAIimages
|
||||||
|
import extensions.openai.logits as OAIlogits
|
||||||
import extensions.openai.models as OAImodels
|
import extensions.openai.models as OAImodels
|
||||||
import extensions.openai.moderations as OAImoderations
|
import extensions.openai.moderations as OAImoderations
|
||||||
from extensions.openai.errors import ServiceUnavailableError
|
from extensions.openai.errors import ServiceUnavailableError
|
||||||
@ -37,8 +38,13 @@ from .typing import (
|
|||||||
EmbeddingsResponse,
|
EmbeddingsResponse,
|
||||||
EncodeRequest,
|
EncodeRequest,
|
||||||
EncodeResponse,
|
EncodeResponse,
|
||||||
|
LoadLorasRequest,
|
||||||
LoadModelRequest,
|
LoadModelRequest,
|
||||||
|
LogitsRequest,
|
||||||
|
LogitsResponse,
|
||||||
|
LoraListResponse,
|
||||||
ModelInfoResponse,
|
ModelInfoResponse,
|
||||||
|
ModelListResponse,
|
||||||
TokenCountResponse,
|
TokenCountResponse,
|
||||||
to_dict
|
to_dict
|
||||||
)
|
)
|
||||||
@ -60,7 +66,15 @@ def verify_api_key(authorization: str = Header(None)) -> None:
|
|||||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(dependencies=[Depends(verify_api_key)])
|
def verify_admin_key(authorization: str = Header(None)) -> None:
|
||||||
|
expected_api_key = shared.args.admin_key
|
||||||
|
if expected_api_key and (authorization is None or authorization != f"Bearer {expected_api_key}"):
|
||||||
|
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
check_key = [Depends(verify_api_key)]
|
||||||
|
check_admin_key = [Depends(verify_admin_key)]
|
||||||
|
|
||||||
# Configure CORS settings to allow all origins, methods, and headers
|
# Configure CORS settings to allow all origins, methods, and headers
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
@ -72,12 +86,12 @@ app.add_middleware(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.options("/")
|
@app.options("/", dependencies=check_key)
|
||||||
async def options_route():
|
async def options_route():
|
||||||
return JSONResponse(content="OK")
|
return JSONResponse(content="OK")
|
||||||
|
|
||||||
|
|
||||||
@app.post('/v1/completions', response_model=CompletionResponse)
|
@app.post('/v1/completions', response_model=CompletionResponse, dependencies=check_key)
|
||||||
async def openai_completions(request: Request, request_data: CompletionRequest):
|
async def openai_completions(request: Request, request_data: CompletionRequest):
|
||||||
path = request.url.path
|
path = request.url.path
|
||||||
is_legacy = "/generate" in path
|
is_legacy = "/generate" in path
|
||||||
@ -100,7 +114,7 @@ async def openai_completions(request: Request, request_data: CompletionRequest):
|
|||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.post('/v1/chat/completions', response_model=ChatCompletionResponse)
|
@app.post('/v1/chat/completions', response_model=ChatCompletionResponse, dependencies=check_key)
|
||||||
async def openai_chat_completions(request: Request, request_data: ChatCompletionRequest):
|
async def openai_chat_completions(request: Request, request_data: ChatCompletionRequest):
|
||||||
path = request.url.path
|
path = request.url.path
|
||||||
is_legacy = "/generate" in path
|
is_legacy = "/generate" in path
|
||||||
@ -123,14 +137,14 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion
|
|||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/v1/models")
|
@app.get("/v1/models", dependencies=check_key)
|
||||||
@app.get("/v1/models/{model}")
|
@app.get("/v1/models/{model}", dependencies=check_key)
|
||||||
async def handle_models(request: Request):
|
async def handle_models(request: Request):
|
||||||
path = request.url.path
|
path = request.url.path
|
||||||
is_list = request.url.path.split('?')[0].split('#')[0] == '/v1/models'
|
is_list = request.url.path.split('?')[0].split('#')[0] == '/v1/models'
|
||||||
|
|
||||||
if is_list:
|
if is_list:
|
||||||
response = OAImodels.list_models()
|
response = OAImodels.list_dummy_models()
|
||||||
else:
|
else:
|
||||||
model_name = path[len('/v1/models/'):]
|
model_name = path[len('/v1/models/'):]
|
||||||
response = OAImodels.model_info_dict(model_name)
|
response = OAImodels.model_info_dict(model_name)
|
||||||
@ -138,7 +152,7 @@ async def handle_models(request: Request):
|
|||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.get('/v1/billing/usage')
|
@app.get('/v1/billing/usage', dependencies=check_key)
|
||||||
def handle_billing_usage():
|
def handle_billing_usage():
|
||||||
'''
|
'''
|
||||||
Ex. /v1/dashboard/billing/usage?start_date=2023-05-01&end_date=2023-05-31
|
Ex. /v1/dashboard/billing/usage?start_date=2023-05-01&end_date=2023-05-31
|
||||||
@ -146,7 +160,7 @@ def handle_billing_usage():
|
|||||||
return JSONResponse(content={"total_usage": 0})
|
return JSONResponse(content={"total_usage": 0})
|
||||||
|
|
||||||
|
|
||||||
@app.post('/v1/audio/transcriptions')
|
@app.post('/v1/audio/transcriptions', dependencies=check_key)
|
||||||
async def handle_audio_transcription(request: Request):
|
async def handle_audio_transcription(request: Request):
|
||||||
r = sr.Recognizer()
|
r = sr.Recognizer()
|
||||||
|
|
||||||
@ -176,7 +190,7 @@ async def handle_audio_transcription(request: Request):
|
|||||||
return JSONResponse(content=transcription)
|
return JSONResponse(content=transcription)
|
||||||
|
|
||||||
|
|
||||||
@app.post('/v1/images/generations')
|
@app.post('/v1/images/generations', dependencies=check_key)
|
||||||
async def handle_image_generation(request: Request):
|
async def handle_image_generation(request: Request):
|
||||||
|
|
||||||
if not os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', '')):
|
if not os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', '')):
|
||||||
@ -192,7 +206,7 @@ async def handle_image_generation(request: Request):
|
|||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/embeddings", response_model=EmbeddingsResponse)
|
@app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key)
|
||||||
async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
|
async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
|
||||||
input = request_data.input
|
input = request_data.input
|
||||||
if not input:
|
if not input:
|
||||||
@ -205,7 +219,7 @@ async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
|
|||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/moderations")
|
@app.post("/v1/moderations", dependencies=check_key)
|
||||||
async def handle_moderations(request: Request):
|
async def handle_moderations(request: Request):
|
||||||
body = await request.json()
|
body = await request.json()
|
||||||
input = body["input"]
|
input = body["input"]
|
||||||
@ -216,37 +230,53 @@ async def handle_moderations(request: Request):
|
|||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/internal/encode", response_model=EncodeResponse)
|
@app.post("/v1/internal/encode", response_model=EncodeResponse, dependencies=check_key)
|
||||||
async def handle_token_encode(request_data: EncodeRequest):
|
async def handle_token_encode(request_data: EncodeRequest):
|
||||||
response = token_encode(request_data.text)
|
response = token_encode(request_data.text)
|
||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/internal/decode", response_model=DecodeResponse)
|
@app.post("/v1/internal/decode", response_model=DecodeResponse, dependencies=check_key)
|
||||||
async def handle_token_decode(request_data: DecodeRequest):
|
async def handle_token_decode(request_data: DecodeRequest):
|
||||||
response = token_decode(request_data.tokens)
|
response = token_decode(request_data.tokens)
|
||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/internal/token-count", response_model=TokenCountResponse)
|
@app.post("/v1/internal/token-count", response_model=TokenCountResponse, dependencies=check_key)
|
||||||
async def handle_token_count(request_data: EncodeRequest):
|
async def handle_token_count(request_data: EncodeRequest):
|
||||||
response = token_count(request_data.text)
|
response = token_count(request_data.text)
|
||||||
return JSONResponse(response)
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/internal/stop-generation")
|
@app.post("/v1/internal/logits", response_model=LogitsResponse, dependencies=check_key)
|
||||||
|
async def handle_logits(request_data: LogitsRequest):
|
||||||
|
'''
|
||||||
|
Given a prompt, returns the top 50 most likely logits as a dict.
|
||||||
|
The keys are the tokens, and the values are the probabilities.
|
||||||
|
'''
|
||||||
|
response = OAIlogits._get_next_logits(to_dict(request_data))
|
||||||
|
return JSONResponse(response)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/v1/internal/stop-generation", dependencies=check_key)
|
||||||
async def handle_stop_generation(request: Request):
|
async def handle_stop_generation(request: Request):
|
||||||
stop_everything_event()
|
stop_everything_event()
|
||||||
return JSONResponse(content="OK")
|
return JSONResponse(content="OK")
|
||||||
|
|
||||||
|
|
||||||
@app.get("/v1/internal/model/info", response_model=ModelInfoResponse)
|
@app.get("/v1/internal/model/info", response_model=ModelInfoResponse, dependencies=check_key)
|
||||||
async def handle_model_info():
|
async def handle_model_info():
|
||||||
payload = OAImodels.get_current_model_info()
|
payload = OAImodels.get_current_model_info()
|
||||||
return JSONResponse(content=payload)
|
return JSONResponse(content=payload)
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/internal/model/load")
|
@app.get("/v1/internal/model/list", response_model=ModelListResponse, dependencies=check_admin_key)
|
||||||
|
async def handle_list_models():
|
||||||
|
payload = OAImodels.list_models()
|
||||||
|
return JSONResponse(content=payload)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/v1/internal/model/load", dependencies=check_admin_key)
|
||||||
async def handle_load_model(request_data: LoadModelRequest):
|
async def handle_load_model(request_data: LoadModelRequest):
|
||||||
'''
|
'''
|
||||||
This endpoint is experimental and may change in the future.
|
This endpoint is experimental and may change in the future.
|
||||||
@ -283,9 +313,30 @@ async def handle_load_model(request_data: LoadModelRequest):
|
|||||||
return HTTPException(status_code=400, detail="Failed to load the model.")
|
return HTTPException(status_code=400, detail="Failed to load the model.")
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/internal/model/unload")
|
@app.post("/v1/internal/model/unload", dependencies=check_admin_key)
|
||||||
async def handle_unload_model():
|
async def handle_unload_model():
|
||||||
unload_model()
|
unload_model()
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)
|
||||||
|
async def handle_list_loras():
|
||||||
|
response = OAImodels.list_loras()
|
||||||
|
return JSONResponse(content=response)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/v1/internal/lora/load", dependencies=check_admin_key)
|
||||||
|
async def handle_load_loras(request_data: LoadLorasRequest):
|
||||||
|
try:
|
||||||
|
OAImodels.load_loras(request_data.lora_names)
|
||||||
|
return JSONResponse(content="OK")
|
||||||
|
except:
|
||||||
|
traceback.print_exc()
|
||||||
|
return HTTPException(status_code=400, detail="Failed to apply the LoRA(s).")
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/v1/internal/lora/unload", dependencies=check_admin_key)
|
||||||
|
async def handle_unload_loras():
|
||||||
|
OAImodels.unload_all_loras()
|
||||||
return JSONResponse(content="OK")
|
return JSONResponse(content="OK")
|
||||||
|
|
||||||
|
|
||||||
@ -308,10 +359,19 @@ def run_server():
|
|||||||
logger.info(f'OpenAI-compatible API URL:\n\nhttp://{server_addr}:{port}\n')
|
logger.info(f'OpenAI-compatible API URL:\n\nhttp://{server_addr}:{port}\n')
|
||||||
|
|
||||||
if shared.args.api_key:
|
if shared.args.api_key:
|
||||||
|
if not shared.args.admin_key:
|
||||||
|
shared.args.admin_key = shared.args.api_key
|
||||||
|
|
||||||
logger.info(f'OpenAI API key:\n\n{shared.args.api_key}\n')
|
logger.info(f'OpenAI API key:\n\n{shared.args.api_key}\n')
|
||||||
|
|
||||||
|
if shared.args.admin_key:
|
||||||
|
logger.info(f'OpenAI API admin key (for loading/unloading models):\n\n{shared.args.admin_key}\n')
|
||||||
|
|
||||||
uvicorn.run(app, host=server_addr, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile)
|
uvicorn.run(app, host=server_addr, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile)
|
||||||
|
|
||||||
|
|
||||||
def setup():
|
def setup():
|
||||||
Thread(target=run_server, daemon=True).start()
|
if shared.args.nowebui:
|
||||||
|
run_server()
|
||||||
|
else:
|
||||||
|
Thread(target=run_server, daemon=True).start()
|
||||||
|
@ -122,38 +122,6 @@ class ChatCompletionResponse(BaseModel):
|
|||||||
usage: dict
|
usage: dict
|
||||||
|
|
||||||
|
|
||||||
class EncodeRequest(BaseModel):
|
|
||||||
text: str
|
|
||||||
|
|
||||||
|
|
||||||
class DecodeRequest(BaseModel):
|
|
||||||
tokens: List[int]
|
|
||||||
|
|
||||||
|
|
||||||
class EncodeResponse(BaseModel):
|
|
||||||
tokens: List[int]
|
|
||||||
length: int
|
|
||||||
|
|
||||||
|
|
||||||
class DecodeResponse(BaseModel):
|
|
||||||
text: str
|
|
||||||
|
|
||||||
|
|
||||||
class TokenCountResponse(BaseModel):
|
|
||||||
length: int
|
|
||||||
|
|
||||||
|
|
||||||
class ModelInfoResponse(BaseModel):
|
|
||||||
model_name: str
|
|
||||||
lora_names: List[str]
|
|
||||||
|
|
||||||
|
|
||||||
class LoadModelRequest(BaseModel):
|
|
||||||
model_name: str
|
|
||||||
args: dict | None = None
|
|
||||||
settings: dict | None = None
|
|
||||||
|
|
||||||
|
|
||||||
class EmbeddingsRequest(BaseModel):
|
class EmbeddingsRequest(BaseModel):
|
||||||
input: str | List[str]
|
input: str | List[str]
|
||||||
model: str | None = Field(default=None, description="Unused parameter. To change the model, set the OPENEDAI_EMBEDDING_MODEL and OPENEDAI_EMBEDDING_DEVICE environment variables before starting the server.")
|
model: str | None = Field(default=None, description="Unused parameter. To change the model, set the OPENEDAI_EMBEDDING_MODEL and OPENEDAI_EMBEDDING_DEVICE environment variables before starting the server.")
|
||||||
@ -167,6 +135,68 @@ class EmbeddingsResponse(BaseModel):
|
|||||||
object: str = "embedding"
|
object: str = "embedding"
|
||||||
|
|
||||||
|
|
||||||
|
class EncodeRequest(BaseModel):
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
class EncodeResponse(BaseModel):
|
||||||
|
tokens: List[int]
|
||||||
|
length: int
|
||||||
|
|
||||||
|
|
||||||
|
class DecodeRequest(BaseModel):
|
||||||
|
tokens: List[int]
|
||||||
|
|
||||||
|
|
||||||
|
class DecodeResponse(BaseModel):
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
|
class TokenCountResponse(BaseModel):
|
||||||
|
length: int
|
||||||
|
|
||||||
|
|
||||||
|
class LogitsRequestParams(BaseModel):
|
||||||
|
prompt: str
|
||||||
|
use_samplers: bool = False
|
||||||
|
frequency_penalty: float | None = 0
|
||||||
|
max_tokens: int | None = 16
|
||||||
|
presence_penalty: float | None = 0
|
||||||
|
temperature: float | None = 1
|
||||||
|
top_p: float | None = 1
|
||||||
|
|
||||||
|
|
||||||
|
class LogitsRequest(GenerationOptions, LogitsRequestParams):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class LogitsResponse(BaseModel):
|
||||||
|
logits: dict
|
||||||
|
|
||||||
|
|
||||||
|
class ModelInfoResponse(BaseModel):
|
||||||
|
model_name: str
|
||||||
|
lora_names: List[str]
|
||||||
|
|
||||||
|
|
||||||
|
class ModelListResponse(BaseModel):
|
||||||
|
model_names: List[str]
|
||||||
|
|
||||||
|
|
||||||
|
class LoadModelRequest(BaseModel):
|
||||||
|
model_name: str
|
||||||
|
args: dict | None = None
|
||||||
|
settings: dict | None = None
|
||||||
|
|
||||||
|
|
||||||
|
class LoraListResponse(BaseModel):
|
||||||
|
lora_names: List[str]
|
||||||
|
|
||||||
|
|
||||||
|
class LoadLorasRequest(BaseModel):
|
||||||
|
lora_names: List[str]
|
||||||
|
|
||||||
|
|
||||||
def to_json(obj):
|
def to_json(obj):
|
||||||
return json.dumps(obj.__dict__, indent=4)
|
return json.dumps(obj.__dict__, indent=4)
|
||||||
|
|
||||||
|
38
js/main.js
38
js/main.js
@ -312,6 +312,10 @@ document.addEventListener("click", function (event) {
|
|||||||
if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
|
if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
|
||||||
hideMenu();
|
hideMenu();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (event.target.classList.contains("pfp_character")) {
|
||||||
|
toggleBigPicture();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
@ -335,3 +339,37 @@ document.getElementById("show-controls").parentNode.style.bottom = "0px";
|
|||||||
// Focus on the chat input
|
// Focus on the chat input
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
document.querySelector("#chat-input textarea").focus();
|
document.querySelector("#chat-input textarea").focus();
|
||||||
|
|
||||||
|
//------------------------------------------------
|
||||||
|
// Show enlarged character picture when the profile
|
||||||
|
// picture is clicked on
|
||||||
|
//------------------------------------------------
|
||||||
|
let bigPictureVisible = false;
|
||||||
|
|
||||||
|
function addBigPicture() {
|
||||||
|
var imgElement = document.createElement("img");
|
||||||
|
var timestamp = new Date().getTime();
|
||||||
|
imgElement.src = "/file/cache/pfp_character.png?time=" + timestamp;
|
||||||
|
imgElement.classList.add("bigProfilePicture");
|
||||||
|
|
||||||
|
var imgElementParent = document.getElementById("chat").parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.parentNode;
|
||||||
|
imgElementParent.appendChild(imgElement);
|
||||||
|
}
|
||||||
|
|
||||||
|
function deleteBigPicture() {
|
||||||
|
var bigProfilePictures = document.querySelectorAll('.bigProfilePicture');
|
||||||
|
bigProfilePictures.forEach(function (element) {
|
||||||
|
element.parentNode.removeChild(element);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function toggleBigPicture() {
|
||||||
|
if(bigPictureVisible) {
|
||||||
|
deleteBigPicture();
|
||||||
|
bigPictureVisible = false;
|
||||||
|
} else {
|
||||||
|
addBigPicture();
|
||||||
|
bigPictureVisible = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -10,6 +10,12 @@ function toggle_controls(value) {
|
|||||||
chatParent.classList.remove("bigchat");
|
chatParent.classList.remove("bigchat");
|
||||||
document.getElementById("chat-input-row").classList.remove("bigchat");
|
document.getElementById("chat-input-row").classList.remove("bigchat");
|
||||||
document.getElementById("chat-col").classList.remove("bigchat");
|
document.getElementById("chat-col").classList.remove("bigchat");
|
||||||
|
|
||||||
|
let gallery_element = document.getElementById('gallery-extension');
|
||||||
|
if (gallery_element) {
|
||||||
|
gallery_element.style.display = 'block';
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
belowChatInput.forEach(element => {
|
belowChatInput.forEach(element => {
|
||||||
element.style.display = "none";
|
element.style.display = "none";
|
||||||
|
7
js/update_big_picture.js
Normal file
7
js/update_big_picture.js
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
function updateBigPicture() {
|
||||||
|
var existingElement = document.querySelector('.bigProfilePicture');
|
||||||
|
if (existingElement) {
|
||||||
|
var timestamp = new Date().getTime();
|
||||||
|
existingElement.src = "/file/cache/pfp_character.png?time=" + timestamp;
|
||||||
|
}
|
||||||
|
}
|
@ -145,14 +145,12 @@ def add_lora_transformers(lora_names):
|
|||||||
if len(lora_names) > 1:
|
if len(lora_names) > 1:
|
||||||
merge_loras()
|
merge_loras()
|
||||||
|
|
||||||
|
shared.lora_names = lora_names
|
||||||
return
|
return
|
||||||
|
|
||||||
# If any LoRA needs to be removed, start over
|
# If any LoRA needs to be removed, start over
|
||||||
if len(removed_set) > 0:
|
if len(removed_set) > 0:
|
||||||
# shared.model may no longer be PeftModel
|
shared.model = shared.model.unload()
|
||||||
if hasattr(shared.model, 'disable_adapter'):
|
|
||||||
shared.model.disable_adapter()
|
|
||||||
shared.model = shared.model.base_model.model
|
|
||||||
|
|
||||||
if len(lora_names) > 0:
|
if len(lora_names) > 0:
|
||||||
params = {}
|
params = {}
|
||||||
@ -172,8 +170,6 @@ def add_lora_transformers(lora_names):
|
|||||||
if len(lora_names) > 1:
|
if len(lora_names) > 1:
|
||||||
merge_loras()
|
merge_loras()
|
||||||
|
|
||||||
shared.lora_names = lora_names
|
|
||||||
|
|
||||||
if not shared.args.load_in_8bit and not shared.args.cpu:
|
if not shared.args.load_in_8bit and not shared.args.cpu:
|
||||||
shared.model.half()
|
shared.model.half()
|
||||||
if not hasattr(shared.model, "hf_device_map"):
|
if not hasattr(shared.model, "hf_device_map"):
|
||||||
@ -186,6 +182,8 @@ def add_lora_transformers(lora_names):
|
|||||||
else:
|
else:
|
||||||
shared.model = shared.model.cuda()
|
shared.model = shared.model.cuda()
|
||||||
|
|
||||||
|
shared.lora_names = lora_names
|
||||||
|
|
||||||
|
|
||||||
def merge_loras():
|
def merge_loras():
|
||||||
if len(list({shared.model.peft_config[adapter].r for adapter in shared.model.peft_config.keys()})) > 1:
|
if len(list({shared.model.peft_config[adapter].r for adapter in shared.model.peft_config.keys()})) > 1:
|
||||||
|
@ -91,7 +91,13 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||||||
if state['mode'] == 'chat-instruct':
|
if state['mode'] == 'chat-instruct':
|
||||||
wrapper = ''
|
wrapper = ''
|
||||||
command = state['chat-instruct_command'].replace('<|character|>', state['name2'] if not impersonate else state['name1'])
|
command = state['chat-instruct_command'].replace('<|character|>', state['name2'] if not impersonate else state['name1'])
|
||||||
wrapper += state['context_instruct']
|
context_instruct = state['context_instruct']
|
||||||
|
if state['custom_system_message'].strip() != '':
|
||||||
|
context_instruct = context_instruct.replace('<|system-message|>', state['custom_system_message'])
|
||||||
|
else:
|
||||||
|
context_instruct = context_instruct.replace('<|system-message|>', state['system_message'])
|
||||||
|
|
||||||
|
wrapper += context_instruct
|
||||||
wrapper += all_substrings['instruct']['user_turn'].replace('<|user-message|>', command)
|
wrapper += all_substrings['instruct']['user_turn'].replace('<|user-message|>', command)
|
||||||
wrapper += all_substrings['instruct']['bot_turn_stripped']
|
wrapper += all_substrings['instruct']['bot_turn_stripped']
|
||||||
if impersonate:
|
if impersonate:
|
||||||
@ -539,9 +545,13 @@ def generate_pfp_cache(character):
|
|||||||
|
|
||||||
for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
|
for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
|
||||||
if path.exists():
|
if path.exists():
|
||||||
img = make_thumbnail(Image.open(path))
|
original_img = Image.open(path)
|
||||||
img.save(Path('cache/pfp_character.png'), format='PNG')
|
original_img.save(Path('cache/pfp_character.png'), format='PNG')
|
||||||
return img
|
|
||||||
|
thumb = make_thumbnail(original_img)
|
||||||
|
thumb.save(Path('cache/pfp_character_thumb.png'), format='PNG')
|
||||||
|
|
||||||
|
return thumb
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -570,8 +580,9 @@ def load_character(character, name1, name2, instruct=False):
|
|||||||
file_contents = open(filepath, 'r', encoding='utf-8').read()
|
file_contents = open(filepath, 'r', encoding='utf-8').read()
|
||||||
data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
|
data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
|
||||||
|
|
||||||
if Path("cache/pfp_character.png").exists() and not instruct:
|
for path in [Path("cache/pfp_character.png"), Path("cache/pfp_character_thumb.png")]:
|
||||||
Path("cache/pfp_character.png").unlink()
|
if path.exists() and not instruct:
|
||||||
|
path.unlink()
|
||||||
|
|
||||||
picture = generate_pfp_cache(character)
|
picture = generate_pfp_cache(character)
|
||||||
|
|
||||||
|
@ -225,7 +225,7 @@ def generate_cai_chat_html(history, name1, name2, style, reset_cache=False):
|
|||||||
output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
|
output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
|
||||||
|
|
||||||
# We use ?name2 and ?time.time() to force the browser to reset caches
|
# We use ?name2 and ?time.time() to force the browser to reset caches
|
||||||
img_bot = f'<img src="file/cache/pfp_character.png?{name2}">' if Path("cache/pfp_character.png").exists() else ''
|
img_bot = f'<img src="file/cache/pfp_character_thumb.png?{name2}" class="pfp_character">' if Path("cache/pfp_character_thumb.png").exists() else ''
|
||||||
img_me = f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">' if Path("cache/pfp_me.png").exists() else ''
|
img_me = f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">' if Path("cache/pfp_me.png").exists() else ''
|
||||||
|
|
||||||
for i, _row in enumerate(history):
|
for i, _row in enumerate(history):
|
||||||
|
@ -39,7 +39,7 @@ class LlamacppHF(PreTrainedModel):
|
|||||||
'n_tokens': self.model.n_tokens,
|
'n_tokens': self.model.n_tokens,
|
||||||
'input_ids': self.model.input_ids,
|
'input_ids': self.model.input_ids,
|
||||||
'scores': self.model.scores,
|
'scores': self.model.scores,
|
||||||
'ctx': self.model.ctx
|
'ctx': self.model._ctx
|
||||||
}
|
}
|
||||||
|
|
||||||
if shared.args.cfg_cache:
|
if shared.args.cfg_cache:
|
||||||
@ -65,7 +65,7 @@ class LlamacppHF(PreTrainedModel):
|
|||||||
'n_tokens': self.model.n_tokens,
|
'n_tokens': self.model.n_tokens,
|
||||||
'input_ids': self.model.input_ids,
|
'input_ids': self.model.input_ids,
|
||||||
'scores': self.model.scores,
|
'scores': self.model.scores,
|
||||||
'ctx': self.model.ctx
|
'ctx': self.model._ctx
|
||||||
})
|
})
|
||||||
|
|
||||||
def save_negative_cache(self):
|
def save_negative_cache(self):
|
||||||
@ -73,20 +73,20 @@ class LlamacppHF(PreTrainedModel):
|
|||||||
'n_tokens': self.model.n_tokens,
|
'n_tokens': self.model.n_tokens,
|
||||||
'input_ids': self.model.input_ids,
|
'input_ids': self.model.input_ids,
|
||||||
'scores': self.model.scores,
|
'scores': self.model.scores,
|
||||||
'ctx': self.model.ctx
|
'ctx': self.model._ctx
|
||||||
})
|
})
|
||||||
|
|
||||||
def load_cache(self):
|
def load_cache(self):
|
||||||
self.model.n_tokens = self.llamacpp_cache['n_tokens']
|
self.model.n_tokens = self.llamacpp_cache['n_tokens']
|
||||||
self.model.input_ids = self.llamacpp_cache['input_ids']
|
self.model.input_ids = self.llamacpp_cache['input_ids']
|
||||||
self.model.scores = self.llamacpp_cache['scores']
|
self.model.scores = self.llamacpp_cache['scores']
|
||||||
self.model.ctx = self.llamacpp_cache['ctx']
|
self.model._ctx = self.llamacpp_cache['ctx']
|
||||||
|
|
||||||
def load_negative_cache(self):
|
def load_negative_cache(self):
|
||||||
self.model.n_tokens = self.llamacpp_cache_negative['n_tokens']
|
self.model.n_tokens = self.llamacpp_cache_negative['n_tokens']
|
||||||
self.model.input_ids = self.llamacpp_cache_negative['input_ids']
|
self.model.input_ids = self.llamacpp_cache_negative['input_ids']
|
||||||
self.model.scores = self.llamacpp_cache_negative['scores']
|
self.model.scores = self.llamacpp_cache_negative['scores']
|
||||||
self.model.ctx = self.llamacpp_cache_negative['ctx']
|
self.model._ctx = self.llamacpp_cache_negative['ctx']
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def device(self) -> torch.device:
|
def device(self) -> torch.device:
|
||||||
@ -192,7 +192,6 @@ class LlamacppHF(PreTrainedModel):
|
|||||||
params = {
|
params = {
|
||||||
'model_path': str(model_file),
|
'model_path': str(model_file),
|
||||||
'n_ctx': shared.args.n_ctx,
|
'n_ctx': shared.args.n_ctx,
|
||||||
'seed': int(shared.args.llama_cpp_seed),
|
|
||||||
'n_threads': shared.args.threads or None,
|
'n_threads': shared.args.threads or None,
|
||||||
'n_threads_batch': shared.args.threads_batch or None,
|
'n_threads_batch': shared.args.threads_batch or None,
|
||||||
'n_batch': shared.args.n_batch,
|
'n_batch': shared.args.n_batch,
|
||||||
|
@ -74,7 +74,6 @@ class LlamaCppModel:
|
|||||||
params = {
|
params = {
|
||||||
'model_path': str(path),
|
'model_path': str(path),
|
||||||
'n_ctx': shared.args.n_ctx,
|
'n_ctx': shared.args.n_ctx,
|
||||||
'seed': int(shared.args.llama_cpp_seed),
|
|
||||||
'n_threads': shared.args.threads or None,
|
'n_threads': shared.args.threads or None,
|
||||||
'n_threads_batch': shared.args.threads_batch or None,
|
'n_threads_batch': shared.args.threads_batch or None,
|
||||||
'n_batch': shared.args.n_batch,
|
'n_batch': shared.args.n_batch,
|
||||||
@ -144,15 +143,16 @@ class LlamaCppModel:
|
|||||||
max_tokens=state['max_new_tokens'],
|
max_tokens=state['max_new_tokens'],
|
||||||
temperature=state['temperature'],
|
temperature=state['temperature'],
|
||||||
top_p=state['top_p'],
|
top_p=state['top_p'],
|
||||||
top_k=state['top_k'],
|
|
||||||
repeat_penalty=state['repetition_penalty'],
|
|
||||||
presence_penalty=state['presence_penalty'],
|
|
||||||
frequency_penalty=state['frequency_penalty'],
|
frequency_penalty=state['frequency_penalty'],
|
||||||
|
presence_penalty=state['presence_penalty'],
|
||||||
|
repeat_penalty=state['repetition_penalty'],
|
||||||
|
top_k=state['top_k'],
|
||||||
|
stream=True,
|
||||||
|
seed=int(state['seed']) if state['seed'] != -1 else None,
|
||||||
tfs_z=state['tfs'],
|
tfs_z=state['tfs'],
|
||||||
mirostat_mode=int(state['mirostat_mode']),
|
mirostat_mode=int(state['mirostat_mode']),
|
||||||
mirostat_tau=state['mirostat_tau'],
|
mirostat_tau=state['mirostat_tau'],
|
||||||
mirostat_eta=state['mirostat_eta'],
|
mirostat_eta=state['mirostat_eta'],
|
||||||
stream=True,
|
|
||||||
logits_processor=logit_processors,
|
logits_processor=logit_processors,
|
||||||
grammar=self.grammar
|
grammar=self.grammar
|
||||||
)
|
)
|
||||||
|
@ -99,7 +99,6 @@ loaders_and_params = OrderedDict({
|
|||||||
'no_mmap',
|
'no_mmap',
|
||||||
'mlock',
|
'mlock',
|
||||||
'no_mul_mat_q',
|
'no_mul_mat_q',
|
||||||
'llama_cpp_seed',
|
|
||||||
'alpha_value',
|
'alpha_value',
|
||||||
'rope_freq_base',
|
'rope_freq_base',
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
@ -366,6 +365,7 @@ loaders_samplers = {
|
|||||||
'repetition_penalty',
|
'repetition_penalty',
|
||||||
'presence_penalty',
|
'presence_penalty',
|
||||||
'frequency_penalty',
|
'frequency_penalty',
|
||||||
|
'seed',
|
||||||
'mirostat_mode',
|
'mirostat_mode',
|
||||||
'mirostat_tau',
|
'mirostat_tau',
|
||||||
'mirostat_eta',
|
'mirostat_eta',
|
||||||
|
@ -8,7 +8,7 @@ from modules.text_generation import generate_reply
|
|||||||
global_scores = None
|
global_scores = None
|
||||||
|
|
||||||
|
|
||||||
def get_next_logits(prompt, state, use_samplers, previous):
|
def get_next_logits(prompt, state, use_samplers, previous, return_dict=False):
|
||||||
if shared.model is None:
|
if shared.model is None:
|
||||||
logger.error("No model is loaded! Select one in the Model tab.")
|
logger.error("No model is loaded! Select one in the Model tab.")
|
||||||
return 'Error: No model is loaded1 Select one in the Model tab.', previous
|
return 'Error: No model is loaded1 Select one in the Model tab.', previous
|
||||||
@ -56,8 +56,16 @@ def get_next_logits(prompt, state, use_samplers, previous):
|
|||||||
topk_indices = [i.expand((1, 1)) for i in topk_indices]
|
topk_indices = [i.expand((1, 1)) for i in topk_indices]
|
||||||
|
|
||||||
tokens = [shared.tokenizer.decode(i) for i in topk_indices]
|
tokens = [shared.tokenizer.decode(i) for i in topk_indices]
|
||||||
output = ''
|
|
||||||
for row in list(zip(topk_values, tokens)):
|
|
||||||
output += f"{row[0]} - {repr(row[1])}\n"
|
|
||||||
|
|
||||||
return output, previous
|
if return_dict:
|
||||||
|
output = {}
|
||||||
|
for row in list(zip(topk_values, tokens)):
|
||||||
|
output[row[1]] = row[0]
|
||||||
|
|
||||||
|
return output
|
||||||
|
else:
|
||||||
|
output = ''
|
||||||
|
for row in list(zip(topk_values, tokens)):
|
||||||
|
output += f"{row[0]} - {repr(row[1])}\n"
|
||||||
|
|
||||||
|
return output, previous
|
||||||
|
@ -1,8 +1,12 @@
|
|||||||
import functools
|
import functools
|
||||||
|
import random
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
|
from modules import shared
|
||||||
|
from modules.loaders import loaders_samplers
|
||||||
|
|
||||||
|
|
||||||
def default_preset():
|
def default_preset():
|
||||||
return {
|
return {
|
||||||
@ -63,6 +67,45 @@ def load_preset_for_ui(name, state):
|
|||||||
return state, *[generate_params[k] for k in presets_params()]
|
return state, *[generate_params[k] for k in presets_params()]
|
||||||
|
|
||||||
|
|
||||||
|
def random_preset(state):
|
||||||
|
params_and_values = {
|
||||||
|
'remove_tail_tokens': {
|
||||||
|
'top_p': [0.5, 0.8, 0.9, 0.95, 0.99],
|
||||||
|
'min_p': [0.5, 0.2, 0.1, 0.05, 0.01],
|
||||||
|
'top_k': [3, 5, 10, 20, 30, 40],
|
||||||
|
'typical_p': [0.2, 0.575, 0.95],
|
||||||
|
'tfs': [0.5, 0.8, 0.9, 0.95, 0.99],
|
||||||
|
'top_a': [0.5, 0.2, 0.1, 0.05, 0.01],
|
||||||
|
'epsilon_cutoff': [1, 3, 5, 7, 9],
|
||||||
|
'eta_cutoff': [3, 6, 9, 12, 15, 18],
|
||||||
|
},
|
||||||
|
'flatten_distribution': {
|
||||||
|
'temperature': [0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0],
|
||||||
|
},
|
||||||
|
'repetition': {
|
||||||
|
'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
|
||||||
|
'presence_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
|
||||||
|
'frequency_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
|
||||||
|
},
|
||||||
|
'other': {
|
||||||
|
'temperature_last': [True, False],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
generate_params = default_preset()
|
||||||
|
for cat in params_and_values:
|
||||||
|
choices = list(params_and_values[cat].keys())
|
||||||
|
if shared.args.loader is not None:
|
||||||
|
choices = [x for x in choices if x in loaders_samplers[shared.args.loader]]
|
||||||
|
|
||||||
|
if len(choices) > 0:
|
||||||
|
choice = random.choice(choices)
|
||||||
|
generate_params[choice] = random.choice(params_and_values[cat][choice])
|
||||||
|
|
||||||
|
state.update(generate_params)
|
||||||
|
return state, *[generate_params[k] for k in presets_params()]
|
||||||
|
|
||||||
|
|
||||||
def generate_preset_yaml(state):
|
def generate_preset_yaml(state):
|
||||||
defaults = default_preset()
|
defaults = default_preset()
|
||||||
data = {k: state[k] for k in presets_params()}
|
data = {k: state[k] for k in presets_params()}
|
||||||
|
@ -112,7 +112,6 @@ parser.add_argument('--no-mmap', action='store_true', help='Prevent mmap from be
|
|||||||
parser.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
|
parser.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
|
||||||
parser.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.')
|
parser.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.')
|
||||||
parser.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17.')
|
parser.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17.')
|
||||||
parser.add_argument('--llama_cpp_seed', type=int, default=0, help='Seed for llama-cpp models. Default is 0 (random).')
|
|
||||||
parser.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
|
parser.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
|
||||||
parser.add_argument('--logits_all', action='store_true', help='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
|
parser.add_argument('--logits_all', action='store_true', help='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
|
||||||
parser.add_argument('--cache-capacity', type=str, help='Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed.')
|
parser.add_argument('--cache-capacity', type=str, help='Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed.')
|
||||||
@ -171,6 +170,8 @@ parser.add_argument('--public-api', action='store_true', help='Create a public U
|
|||||||
parser.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)
|
parser.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)
|
||||||
parser.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.')
|
parser.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.')
|
||||||
parser.add_argument('--api-key', type=str, default='', help='API authentication key.')
|
parser.add_argument('--api-key', type=str, default='', help='API authentication key.')
|
||||||
|
parser.add_argument('--admin-key', type=str, default='', help='API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.')
|
||||||
|
parser.add_argument('--nowebui', action='store_true', help='Do not launch the Gradio UI. Useful for launching the API in standalone mode.')
|
||||||
|
|
||||||
# Multimodal
|
# Multimodal
|
||||||
parser.add_argument('--multimodal-pipeline', type=str, default=None, help='The multimodal pipeline to use. Examples: llava-7b, llava-13b.')
|
parser.add_argument('--multimodal-pipeline', type=str, default=None, help='The multimodal pipeline to use. Examples: llava-7b, llava-13b.')
|
||||||
@ -182,6 +183,7 @@ parser.add_argument('--no-stream', action='store_true', help='DEPRECATED')
|
|||||||
parser.add_argument('--mul_mat_q', action='store_true', help='DEPRECATED')
|
parser.add_argument('--mul_mat_q', action='store_true', help='DEPRECATED')
|
||||||
parser.add_argument('--api-blocking-port', type=int, default=5000, help='DEPRECATED')
|
parser.add_argument('--api-blocking-port', type=int, default=5000, help='DEPRECATED')
|
||||||
parser.add_argument('--api-streaming-port', type=int, default=5005, help='DEPRECATED')
|
parser.add_argument('--api-streaming-port', type=int, default=5005, help='DEPRECATED')
|
||||||
|
parser.add_argument('--llama_cpp_seed', type=int, default=0, help='DEPRECATED')
|
||||||
parser.add_argument('--use_fast', action='store_true', help='DEPRECATED')
|
parser.add_argument('--use_fast', action='store_true', help='DEPRECATED')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@ -200,7 +202,7 @@ for k in ['notebook', 'chat', 'no_stream', 'mul_mat_q', 'use_fast']:
|
|||||||
# Security warnings
|
# Security warnings
|
||||||
if args.trust_remote_code:
|
if args.trust_remote_code:
|
||||||
logger.warning('trust_remote_code is enabled. This is dangerous.')
|
logger.warning('trust_remote_code is enabled. This is dangerous.')
|
||||||
if 'COLAB_GPU' not in os.environ:
|
if 'COLAB_GPU' not in os.environ and not args.nowebui:
|
||||||
if args.share:
|
if args.share:
|
||||||
logger.warning("The gradio \"share link\" feature uses a proprietary executable to create a reverse tunnel. Use it with care.")
|
logger.warning("The gradio \"share link\" feature uses a proprietary executable to create a reverse tunnel. Use it with care.")
|
||||||
if any((args.listen, args.share)) and not any((args.gradio_auth, args.gradio_auth_path)):
|
if any((args.listen, args.share)) and not any((args.gradio_auth, args.gradio_auth_path)):
|
||||||
|
@ -20,6 +20,8 @@ with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
|
|||||||
switch_tabs_js = f.read()
|
switch_tabs_js = f.read()
|
||||||
with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
|
with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
|
||||||
show_controls_js = f.read()
|
show_controls_js = f.read()
|
||||||
|
with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
|
||||||
|
update_big_picture_js = f.read()
|
||||||
|
|
||||||
refresh_symbol = '🔄'
|
refresh_symbol = '🔄'
|
||||||
delete_symbol = '🗑️'
|
delete_symbol = '🗑️'
|
||||||
@ -80,7 +82,6 @@ def list_model_elements():
|
|||||||
'n_gpu_layers',
|
'n_gpu_layers',
|
||||||
'tensor_split',
|
'tensor_split',
|
||||||
'n_ctx',
|
'n_ctx',
|
||||||
'llama_cpp_seed',
|
|
||||||
'gpu_split',
|
'gpu_split',
|
||||||
'max_seq_len',
|
'max_seq_len',
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
|
@ -275,7 +275,8 @@ def create_event_handlers():
|
|||||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||||
chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
|
chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
|
||||||
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
|
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
|
||||||
lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id'))
|
lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then(
|
||||||
|
lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
|
||||||
|
|
||||||
shared.gradio['mode'].change(
|
shared.gradio['mode'].change(
|
||||||
lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then(
|
lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then(
|
||||||
|
@ -37,6 +37,14 @@ def create_ui():
|
|||||||
shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop', interactive=not mu)
|
shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop', interactive=not mu)
|
||||||
shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes="small-button")
|
shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes="small-button")
|
||||||
|
|
||||||
|
# Preset saver
|
||||||
|
with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['preset_saver']:
|
||||||
|
shared.gradio['save_preset_filename'] = gr.Textbox(lines=1, label='File name', info='The preset will be saved to your presets/ folder with this base filename.')
|
||||||
|
shared.gradio['save_preset_contents'] = gr.Textbox(lines=10, label='File contents')
|
||||||
|
with gr.Row():
|
||||||
|
shared.gradio['save_preset_confirm'] = gr.Button('Save', elem_classes="small-button", variant='primary', interactive=not mu)
|
||||||
|
shared.gradio['save_preset_cancel'] = gr.Button('Cancel', elem_classes="small-button")
|
||||||
|
|
||||||
|
|
||||||
def create_event_handlers():
|
def create_event_handlers():
|
||||||
shared.gradio['save_confirm'].click(
|
shared.gradio['save_confirm'].click(
|
||||||
@ -65,10 +73,16 @@ def create_event_handlers():
|
|||||||
|
|
||||||
shared.gradio['save_preset'].click(
|
shared.gradio['save_preset'].click(
|
||||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||||
presets.generate_preset_yaml, gradio('interface_state'), gradio('save_contents')).then(
|
presets.generate_preset_yaml, gradio('interface_state'), gradio('save_preset_contents')).then(
|
||||||
lambda: 'presets/', None, gradio('save_root')).then(
|
lambda: 'My Preset', None, gradio('save_preset_filename')).then(
|
||||||
lambda: 'My Preset.yaml', None, gradio('save_filename')).then(
|
lambda: gr.update(visible=True), None, gradio('preset_saver'))
|
||||||
lambda: gr.update(visible=True), None, gradio('file_saver'))
|
|
||||||
|
shared.gradio['save_preset_confirm'].click(
|
||||||
|
lambda x, y: utils.save_file(f'presets/{x}.yaml', y), gradio('save_preset_filename', 'save_preset_contents'), None).then(
|
||||||
|
lambda: gr.update(visible=False), None, gradio('preset_saver')).then(
|
||||||
|
lambda x: gr.update(choices=utils.get_available_presets(), value=x), gradio('save_preset_filename'), gradio('preset_menu'))
|
||||||
|
|
||||||
|
shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'))
|
||||||
|
|
||||||
shared.gradio['delete_preset'].click(
|
shared.gradio['delete_preset'].click(
|
||||||
lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then(
|
lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then(
|
||||||
|
@ -120,7 +120,6 @@ def create_ui():
|
|||||||
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
||||||
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
|
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
|
||||||
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
|
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
|
||||||
shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed)
|
|
||||||
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.', interactive=shared.args.trust_remote_code)
|
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.', interactive=shared.args.trust_remote_code)
|
||||||
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')
|
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')
|
||||||
shared.gradio['logits_all'] = gr.Checkbox(label="logits_all", value=shared.args.logits_all, info='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
|
shared.gradio['logits_all'] = gr.Checkbox(label="logits_all", value=shared.args.logits_all, info='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
|
||||||
|
@ -18,6 +18,7 @@ def create_ui(default_preset):
|
|||||||
ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
|
ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
|
||||||
shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
|
shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
|
||||||
shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
|
shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
|
||||||
|
shared.gradio['random_preset'] = gr.Button('🎲', elem_classes='refresh-button')
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()), value="All", elem_classes='slim-dropdown')
|
shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()), value="All", elem_classes='slim-dropdown')
|
||||||
@ -90,6 +91,7 @@ def create_ui(default_preset):
|
|||||||
def create_event_handlers():
|
def create_event_handlers():
|
||||||
shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader'), gradio(loaders.list_all_samplers()), show_progress=False)
|
shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader'), gradio(loaders.list_all_samplers()), show_progress=False)
|
||||||
shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
|
shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
|
||||||
|
shared.gradio['random_preset'].click(presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
|
||||||
shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'))
|
shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'))
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,14 +27,14 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
@ -67,14 +67,14 @@ https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn
|
|||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
|
@ -27,14 +27,14 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
@ -45,10 +45,10 @@ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5
|
|||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.18+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.18+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.18+rocm5.6.1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.18+rocm5.6.1-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
|
@ -27,14 +27,14 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
@ -27,19 +27,15 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
|
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
|
|
||||||
|
@ -27,19 +27,19 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
|
||||||
|
@ -27,11 +27,11 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
|
@ -27,11 +27,11 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
|
@ -27,14 +27,14 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
@ -67,14 +67,14 @@ https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn
|
|||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
|
24
server.py
24
server.py
@ -226,13 +226,19 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
shared.generation_lock = Lock()
|
shared.generation_lock = Lock()
|
||||||
|
|
||||||
# Launch the web UI
|
if shared.args.nowebui:
|
||||||
create_interface()
|
# Start the API in standalone mode
|
||||||
while True:
|
shared.args.extensions = [x for x in shared.args.extensions if x != 'gallery']
|
||||||
time.sleep(0.5)
|
if shared.args.extensions is not None and len(shared.args.extensions) > 0:
|
||||||
if shared.need_restart:
|
extensions_module.load_extensions()
|
||||||
shared.need_restart = False
|
else:
|
||||||
|
# Launch the web UI
|
||||||
|
create_interface()
|
||||||
|
while True:
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
shared.gradio['interface'].close()
|
if shared.need_restart:
|
||||||
time.sleep(0.5)
|
shared.need_restart = False
|
||||||
create_interface()
|
time.sleep(0.5)
|
||||||
|
shared.gradio['interface'].close()
|
||||||
|
time.sleep(0.5)
|
||||||
|
create_interface()
|
||||||
|
Loading…
Reference in New Issue
Block a user