mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-10-30 06:00:15 +01:00
commit
d1bba48a83
@ -169,7 +169,7 @@ cp docker/.env.example .env
|
||||
docker compose up --build
|
||||
```
|
||||
|
||||
* You need to have docker compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.
|
||||
* You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.
|
||||
* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
|
||||
|
||||
### Updating the requirements
|
||||
@ -325,7 +325,6 @@ Optionally, you can use the following command-line flags:
|
||||
| `--mlock` | Force the system to keep the model in RAM. |
|
||||
| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. |
|
||||
| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17. |
|
||||
| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default is 0 (random). |
|
||||
| `--numa` | Activate NUMA task allocation for llama.cpp. |
|
||||
| `--logits_all`| Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower. |
|
||||
| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
|
||||
@ -414,6 +413,8 @@ Optionally, you can use the following command-line flags:
|
||||
| `--public-api-id PUBLIC_API_ID` | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. |
|
||||
| `--api-port API_PORT` | The listening port for the API. |
|
||||
| `--api-key API_KEY` | API authentication key. |
|
||||
| `--admin-key ADMIN_KEY` | API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key. |
|
||||
| `--nowebui` | Do not launch the Gradio UI. Useful for launching the API in standalone mode. |
|
||||
|
||||
#### Multimodal
|
||||
|
||||
|
20
css/main.css
20
css/main.css
@ -648,3 +648,23 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
||||
.options {
|
||||
z-index: 100 !important;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------
|
||||
Big profile picture for characters
|
||||
---------------------------------------------- */
|
||||
.bigProfilePicture {
|
||||
position: fixed;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
width: calc((100vw - 880px - 120px) /2);
|
||||
}
|
||||
|
||||
.pfp_character:hover {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
@media screen and (width <= 1300px) {
|
||||
.bigProfilePicture {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
@ -5,5 +5,4 @@ Dockerfile
|
||||
/models
|
||||
/presets
|
||||
/prompts
|
||||
/softprompts
|
||||
/training
|
||||
|
@ -3,13 +3,8 @@
|
||||
# https://developer.nvidia.com/cuda-gpus you can find the version for your card here
|
||||
TORCH_CUDA_ARCH_LIST=7.5
|
||||
|
||||
# these commands worked for me with roughly 4.5GB of vram
|
||||
CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices
|
||||
|
||||
# the following examples have been tested with the files linked in docs/README_docker.md:
|
||||
# example running 13b with 4bit/128 groupsize : CLI_ARGS=--model llama-13b-4bit-128g --wbits 4 --listen --groupsize 128 --pre_layer 25
|
||||
# example with loading api extension and public share: CLI_ARGS=--model llama-7b-4bit --wbits 4 --listen --auto-devices --no-stream --extensions api --share
|
||||
# example running 7b with 8bit groupsize : CLI_ARGS=--model llama-7b --load-in-8bit --listen --auto-devices
|
||||
# your command-line flags go here:
|
||||
CLI_ARGS=
|
||||
|
||||
# the port the webui binds to on the host
|
||||
HOST_PORT=7860
|
||||
@ -21,10 +16,5 @@ HOST_API_PORT=5000
|
||||
# the port the api binds to inside the container
|
||||
CONTAINER_API_PORT=5000
|
||||
|
||||
# the port the api stream endpoint binds to on the host
|
||||
HOST_API_STREAM_PORT=5005
|
||||
# the port the api stream endpoint binds to inside the container
|
||||
CONTAINER_API_STREAM_PORT=5005
|
||||
|
||||
# the version used to install text-generation-webui from
|
||||
WEBUI_VERSION=HEAD
|
||||
|
@ -73,5 +73,5 @@ RUN --mount=type=cache,target=/root/.cache/pip,rw \
|
||||
|
||||
ENV CLI_ARGS=""
|
||||
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}
|
||||
CMD . /app/venv/bin/activate && python3 server.py ${CLI_ARGS}
|
||||
|
@ -11,7 +11,6 @@ services:
|
||||
ports:
|
||||
- "${HOST_PORT:-7860}:${CONTAINER_PORT:-7860}"
|
||||
- "${HOST_API_PORT:-5000}:${CONTAINER_API_PORT:-5000}"
|
||||
- "${HOST_API_STREAM_PORT:-5005}:${CONTAINER_API_STREAM_PORT:-5005}"
|
||||
stdin_open: true
|
||||
tty: true
|
||||
volumes:
|
||||
|
@ -11,9 +11,13 @@ LLMs work by generating one token at a time. Given your prompt, the model calcul
|
||||
|
||||
### Preset menu
|
||||
|
||||
Can be used to save combinations of parameters for reuse.
|
||||
Can be used to save and load combinations of parameters for reuse.
|
||||
|
||||
The built-in presets were not manually chosen. They were obtained after a blind contest called "Preset Arena" where hundreds of people voted. The full results can be found [here](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md).
|
||||
* **🎲 button**: creates a random yet interpretable preset. Only 1 parameter of each category is included for the categories: removing tail tokens, avoiding repetition, and flattening the distribution. That is, top_p and top_k are not mixed, and neither are repetition_penalty and frequency_penalty. You can use this button to break out of a loop of bad generations after multiple "Regenerate" attempts.
|
||||
|
||||
#### Built-in presets
|
||||
|
||||
These were obtained after a blind contest called "Preset Arena" where hundreds of people voted. The full results can be found [here](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md).
|
||||
|
||||
A key takeaway is that the best presets are:
|
||||
|
||||
|
@ -1,13 +1,21 @@
|
||||
Docker Compose is a way of installing and launching the web UI in an isolated Ubuntu image using only a few commands.
|
||||
|
||||
In order to create the image as described in the main README, you must have docker compose 2.17 or higher:
|
||||
## Installing Docker Compose
|
||||
|
||||
In order to create the image as described in the main README, you must have Docker Compose installed (2.17 or higher is recommended):
|
||||
|
||||
```
|
||||
~$ docker compose version
|
||||
Docker Compose version v2.17.2
|
||||
Docker Compose version v2.21.0
|
||||
```
|
||||
|
||||
Make sure to also create the necessary symbolic links:
|
||||
The installation instructions for various Linux distributions can be found here:
|
||||
|
||||
https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository
|
||||
|
||||
## Launching the image
|
||||
|
||||
Use these commands to launch the image:
|
||||
|
||||
```
|
||||
cd text-generation-webui
|
||||
@ -17,13 +25,11 @@ cp docker/.env.example .env
|
||||
docker compose up --build
|
||||
```
|
||||
|
||||
## Table of contents
|
||||
## More detailed installation instructions
|
||||
|
||||
* [Docker Compose installation instructions](#docker-compose-installation-instructions)
|
||||
* [Repository with additional Docker files](#dedicated-docker-repository)
|
||||
|
||||
## Docker Compose installation instructions
|
||||
|
||||
By [@loeken](https://github.com/loeken).
|
||||
|
||||
- [Ubuntu 22.04](#ubuntu-2204)
|
||||
|
@ -97,6 +97,29 @@ curl http://127.0.0.1:5000/v1/chat/completions \
|
||||
}'
|
||||
```
|
||||
|
||||
#### Logits
|
||||
|
||||
```
|
||||
curl -k http://127.0.0.1:5000/v1/internal/logits \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "Who is best, Asuka or Rei? Answer:",
|
||||
"use_samplers": false
|
||||
}'
|
||||
```
|
||||
|
||||
#### Logits after sampling parameters
|
||||
|
||||
```
|
||||
curl -k http://127.0.0.1:5000/v1/internal/logits \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"prompt": "Who is best, Asuka or Rei? Answer:",
|
||||
"use_samplers": true,
|
||||
"top_k": 3
|
||||
}'
|
||||
```
|
||||
|
||||
#### Python chat example
|
||||
|
||||
```python
|
||||
|
@ -2,13 +2,13 @@
|
||||
|
||||
| Loader | Loading 1 LoRA | Loading 2 or more LoRAs | Training LoRAs | Multimodal extension | Perplexity evaluation |
|
||||
|----------------|----------------|-------------------------|----------------|----------------------|-----------------------|
|
||||
| Transformers | ✅ | ✅ | ✅* | ✅ | ✅ |
|
||||
| Transformers | ✅ | ✅*** | ✅* | ✅ | ✅ |
|
||||
| ExLlama_HF | ✅ | ❌ | ❌ | ❌ | ✅ |
|
||||
| ExLlamav2_HF | ✅ | ✅ | ❌ | ❌ | ✅ |
|
||||
| ExLlama | ✅ | ❌ | ❌ | ❌ | use ExLlama_HF |
|
||||
| ExLlamav2 | ✅ | ✅ | ❌ | ❌ | use ExLlamav2_HF |
|
||||
| AutoGPTQ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
||||
| GPTQ-for-LLaMa | ✅** | ✅ | ✅ | ✅ | ✅ |
|
||||
| GPTQ-for-LLaMa | ✅** | ✅*** | ✅ | ✅ | ✅ |
|
||||
| llama.cpp | ❌ | ❌ | ❌ | ❌ | use llamacpp_HF |
|
||||
| llamacpp_HF | ❌ | ❌ | ❌ | ❌ | ✅ |
|
||||
| ctransformers | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
@ -21,3 +21,5 @@
|
||||
\* Training LoRAs with GPTQ models also works with the Transformers loader. Make sure to check "auto-devices" and "disable_exllama" before loading the model.
|
||||
|
||||
\*\* Requires the monkey-patch. The instructions can be found [here](https://github.com/oobabooga/text-generation-webui/wiki/08-%E2%80%90-Additional-Tips#using-loras-with-gptq-for-llama).
|
||||
|
||||
\*\*\* Multi-LoRA in PEFT is tricky and the current implementation does not work reliably in all cases.
|
||||
|
@ -91,11 +91,13 @@ def ui():
|
||||
with gr.Accordion("Character gallery", open=False, elem_id='gallery-extension'):
|
||||
update = gr.Button("Refresh")
|
||||
gr.HTML(value="<style>" + generate_css() + "</style>")
|
||||
gallery = gr.Dataset(components=[gr.HTML(visible=False)],
|
||||
label="",
|
||||
samples=generate_html(),
|
||||
elem_classes=["character-gallery"],
|
||||
samples_per_page=50
|
||||
)
|
||||
gallery = gr.Dataset(
|
||||
components=[gr.HTML(visible=False)],
|
||||
label="",
|
||||
samples=generate_html(),
|
||||
elem_classes=["character-gallery"],
|
||||
samples_per_page=50
|
||||
)
|
||||
|
||||
update.click(generate_html, [], gallery)
|
||||
gallery.select(select_character, None, gradio['character_menu'])
|
||||
|
@ -203,6 +203,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) -
|
||||
turn_template = body['turn_template'] or turn_template
|
||||
context_instruct = body['context_instruct'] or context_instruct
|
||||
system_message = body['system_message'] or system_message
|
||||
chat_instruct_command = body['chat_instruct_command'] or shared.settings['chat-instruct_command']
|
||||
|
||||
# Chat character
|
||||
character = body['character'] or shared.settings['character']
|
||||
@ -228,7 +229,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) -
|
||||
'system_message': system_message,
|
||||
'custom_system_message': custom_system_message,
|
||||
'turn_template': turn_template,
|
||||
'chat-instruct_command': body['chat_instruct_command'],
|
||||
'chat-instruct_command': chat_instruct_command,
|
||||
'history': history,
|
||||
'stream': stream
|
||||
})
|
||||
|
@ -1,6 +1,7 @@
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from transformers import AutoModel
|
||||
|
||||
from extensions.openai.errors import ServiceUnavailableError
|
||||
from extensions.openai.utils import debug_msg, float_list_to_base64
|
||||
@ -41,7 +42,12 @@ def load_embedding_model(model: str):
|
||||
global embeddings_device, embeddings_model
|
||||
try:
|
||||
print(f"Try embedding model: {model} on {embeddings_device}")
|
||||
embeddings_model = SentenceTransformer(model, device=embeddings_device)
|
||||
if 'jina-embeddings' in model:
|
||||
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
|
||||
embeddings_model = embeddings_model.to(embeddings_device)
|
||||
else:
|
||||
embeddings_model = SentenceTransformer(model, device=embeddings_device)
|
||||
|
||||
print(f"Loaded embedding model: {model}")
|
||||
except Exception as e:
|
||||
embeddings_model = None
|
||||
|
@ -1,8 +1,9 @@
|
||||
from modules import shared
|
||||
from modules.logging_colors import logger
|
||||
from modules.LoRA import add_lora_to_model
|
||||
from modules.models import load_model, unload_model
|
||||
from modules.models_settings import get_model_metadata, update_model_parameters
|
||||
from modules.utils import get_available_models
|
||||
from modules.utils import get_available_loras, get_available_models
|
||||
|
||||
|
||||
def get_current_model_info():
|
||||
@ -13,12 +14,17 @@ def get_current_model_info():
|
||||
|
||||
|
||||
def list_models():
|
||||
return {'model_names': get_available_models()[1:]}
|
||||
|
||||
|
||||
def list_dummy_models():
|
||||
result = {
|
||||
"object": "list",
|
||||
"data": []
|
||||
}
|
||||
|
||||
for model in get_dummy_models() + get_available_models()[1:]:
|
||||
# these are expected by so much, so include some here as a dummy
|
||||
for model in ['gpt-3.5-turbo', 'text-embedding-ada-002']:
|
||||
result["data"].append(model_info_dict(model))
|
||||
|
||||
return result
|
||||
@ -33,13 +39,6 @@ def model_info_dict(model_name: str) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def get_dummy_models() -> list:
|
||||
return [ # these are expected by so much, so include some here as a dummy
|
||||
'gpt-3.5-turbo',
|
||||
'text-embedding-ada-002',
|
||||
]
|
||||
|
||||
|
||||
def _load_model(data):
|
||||
model_name = data["model_name"]
|
||||
args = data["args"]
|
||||
@ -67,3 +66,15 @@ def _load_model(data):
|
||||
logger.info(f"TRUNCATION LENGTH (UPDATED): {shared.settings['truncation_length']}")
|
||||
elif k == 'instruction_template':
|
||||
logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")
|
||||
|
||||
|
||||
def list_loras():
|
||||
return {'lora_names': get_available_loras()[1:]}
|
||||
|
||||
|
||||
def load_loras(lora_names):
|
||||
add_lora_to_model(lora_names)
|
||||
|
||||
|
||||
def unload_all_loras():
|
||||
add_lora_to_model([])
|
||||
|
@ -16,6 +16,7 @@ from sse_starlette import EventSourceResponse
|
||||
import extensions.openai.completions as OAIcompletions
|
||||
import extensions.openai.embeddings as OAIembeddings
|
||||
import extensions.openai.images as OAIimages
|
||||
import extensions.openai.logits as OAIlogits
|
||||
import extensions.openai.models as OAImodels
|
||||
import extensions.openai.moderations as OAImoderations
|
||||
from extensions.openai.errors import ServiceUnavailableError
|
||||
@ -37,8 +38,13 @@ from .typing import (
|
||||
EmbeddingsResponse,
|
||||
EncodeRequest,
|
||||
EncodeResponse,
|
||||
LoadLorasRequest,
|
||||
LoadModelRequest,
|
||||
LogitsRequest,
|
||||
LogitsResponse,
|
||||
LoraListResponse,
|
||||
ModelInfoResponse,
|
||||
ModelListResponse,
|
||||
TokenCountResponse,
|
||||
to_dict
|
||||
)
|
||||
@ -60,7 +66,15 @@ def verify_api_key(authorization: str = Header(None)) -> None:
|
||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||
|
||||
|
||||
app = FastAPI(dependencies=[Depends(verify_api_key)])
|
||||
def verify_admin_key(authorization: str = Header(None)) -> None:
|
||||
expected_api_key = shared.args.admin_key
|
||||
if expected_api_key and (authorization is None or authorization != f"Bearer {expected_api_key}"):
|
||||
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
check_key = [Depends(verify_api_key)]
|
||||
check_admin_key = [Depends(verify_admin_key)]
|
||||
|
||||
# Configure CORS settings to allow all origins, methods, and headers
|
||||
app.add_middleware(
|
||||
@ -72,12 +86,12 @@ app.add_middleware(
|
||||
)
|
||||
|
||||
|
||||
@app.options("/")
|
||||
@app.options("/", dependencies=check_key)
|
||||
async def options_route():
|
||||
return JSONResponse(content="OK")
|
||||
|
||||
|
||||
@app.post('/v1/completions', response_model=CompletionResponse)
|
||||
@app.post('/v1/completions', response_model=CompletionResponse, dependencies=check_key)
|
||||
async def openai_completions(request: Request, request_data: CompletionRequest):
|
||||
path = request.url.path
|
||||
is_legacy = "/generate" in path
|
||||
@ -100,7 +114,7 @@ async def openai_completions(request: Request, request_data: CompletionRequest):
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.post('/v1/chat/completions', response_model=ChatCompletionResponse)
|
||||
@app.post('/v1/chat/completions', response_model=ChatCompletionResponse, dependencies=check_key)
|
||||
async def openai_chat_completions(request: Request, request_data: ChatCompletionRequest):
|
||||
path = request.url.path
|
||||
is_legacy = "/generate" in path
|
||||
@ -123,14 +137,14 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.get("/v1/models")
|
||||
@app.get("/v1/models/{model}")
|
||||
@app.get("/v1/models", dependencies=check_key)
|
||||
@app.get("/v1/models/{model}", dependencies=check_key)
|
||||
async def handle_models(request: Request):
|
||||
path = request.url.path
|
||||
is_list = request.url.path.split('?')[0].split('#')[0] == '/v1/models'
|
||||
|
||||
if is_list:
|
||||
response = OAImodels.list_models()
|
||||
response = OAImodels.list_dummy_models()
|
||||
else:
|
||||
model_name = path[len('/v1/models/'):]
|
||||
response = OAImodels.model_info_dict(model_name)
|
||||
@ -138,7 +152,7 @@ async def handle_models(request: Request):
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.get('/v1/billing/usage')
|
||||
@app.get('/v1/billing/usage', dependencies=check_key)
|
||||
def handle_billing_usage():
|
||||
'''
|
||||
Ex. /v1/dashboard/billing/usage?start_date=2023-05-01&end_date=2023-05-31
|
||||
@ -146,7 +160,7 @@ def handle_billing_usage():
|
||||
return JSONResponse(content={"total_usage": 0})
|
||||
|
||||
|
||||
@app.post('/v1/audio/transcriptions')
|
||||
@app.post('/v1/audio/transcriptions', dependencies=check_key)
|
||||
async def handle_audio_transcription(request: Request):
|
||||
r = sr.Recognizer()
|
||||
|
||||
@ -176,7 +190,7 @@ async def handle_audio_transcription(request: Request):
|
||||
return JSONResponse(content=transcription)
|
||||
|
||||
|
||||
@app.post('/v1/images/generations')
|
||||
@app.post('/v1/images/generations', dependencies=check_key)
|
||||
async def handle_image_generation(request: Request):
|
||||
|
||||
if not os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', '')):
|
||||
@ -192,7 +206,7 @@ async def handle_image_generation(request: Request):
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.post("/v1/embeddings", response_model=EmbeddingsResponse)
|
||||
@app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key)
|
||||
async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
|
||||
input = request_data.input
|
||||
if not input:
|
||||
@ -205,7 +219,7 @@ async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.post("/v1/moderations")
|
||||
@app.post("/v1/moderations", dependencies=check_key)
|
||||
async def handle_moderations(request: Request):
|
||||
body = await request.json()
|
||||
input = body["input"]
|
||||
@ -216,37 +230,53 @@ async def handle_moderations(request: Request):
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.post("/v1/internal/encode", response_model=EncodeResponse)
|
||||
@app.post("/v1/internal/encode", response_model=EncodeResponse, dependencies=check_key)
|
||||
async def handle_token_encode(request_data: EncodeRequest):
|
||||
response = token_encode(request_data.text)
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.post("/v1/internal/decode", response_model=DecodeResponse)
|
||||
@app.post("/v1/internal/decode", response_model=DecodeResponse, dependencies=check_key)
|
||||
async def handle_token_decode(request_data: DecodeRequest):
|
||||
response = token_decode(request_data.tokens)
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.post("/v1/internal/token-count", response_model=TokenCountResponse)
|
||||
@app.post("/v1/internal/token-count", response_model=TokenCountResponse, dependencies=check_key)
|
||||
async def handle_token_count(request_data: EncodeRequest):
|
||||
response = token_count(request_data.text)
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.post("/v1/internal/stop-generation")
|
||||
@app.post("/v1/internal/logits", response_model=LogitsResponse, dependencies=check_key)
|
||||
async def handle_logits(request_data: LogitsRequest):
|
||||
'''
|
||||
Given a prompt, returns the top 50 most likely logits as a dict.
|
||||
The keys are the tokens, and the values are the probabilities.
|
||||
'''
|
||||
response = OAIlogits._get_next_logits(to_dict(request_data))
|
||||
return JSONResponse(response)
|
||||
|
||||
|
||||
@app.post("/v1/internal/stop-generation", dependencies=check_key)
|
||||
async def handle_stop_generation(request: Request):
|
||||
stop_everything_event()
|
||||
return JSONResponse(content="OK")
|
||||
|
||||
|
||||
@app.get("/v1/internal/model/info", response_model=ModelInfoResponse)
|
||||
@app.get("/v1/internal/model/info", response_model=ModelInfoResponse, dependencies=check_key)
|
||||
async def handle_model_info():
|
||||
payload = OAImodels.get_current_model_info()
|
||||
return JSONResponse(content=payload)
|
||||
|
||||
|
||||
@app.post("/v1/internal/model/load")
|
||||
@app.get("/v1/internal/model/list", response_model=ModelListResponse, dependencies=check_admin_key)
|
||||
async def handle_list_models():
|
||||
payload = OAImodels.list_models()
|
||||
return JSONResponse(content=payload)
|
||||
|
||||
|
||||
@app.post("/v1/internal/model/load", dependencies=check_admin_key)
|
||||
async def handle_load_model(request_data: LoadModelRequest):
|
||||
'''
|
||||
This endpoint is experimental and may change in the future.
|
||||
@ -283,9 +313,30 @@ async def handle_load_model(request_data: LoadModelRequest):
|
||||
return HTTPException(status_code=400, detail="Failed to load the model.")
|
||||
|
||||
|
||||
@app.post("/v1/internal/model/unload")
|
||||
@app.post("/v1/internal/model/unload", dependencies=check_admin_key)
|
||||
async def handle_unload_model():
|
||||
unload_model()
|
||||
|
||||
|
||||
@app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)
|
||||
async def handle_list_loras():
|
||||
response = OAImodels.list_loras()
|
||||
return JSONResponse(content=response)
|
||||
|
||||
|
||||
@app.post("/v1/internal/lora/load", dependencies=check_admin_key)
|
||||
async def handle_load_loras(request_data: LoadLorasRequest):
|
||||
try:
|
||||
OAImodels.load_loras(request_data.lora_names)
|
||||
return JSONResponse(content="OK")
|
||||
except:
|
||||
traceback.print_exc()
|
||||
return HTTPException(status_code=400, detail="Failed to apply the LoRA(s).")
|
||||
|
||||
|
||||
@app.post("/v1/internal/lora/unload", dependencies=check_admin_key)
|
||||
async def handle_unload_loras():
|
||||
OAImodels.unload_all_loras()
|
||||
return JSONResponse(content="OK")
|
||||
|
||||
|
||||
@ -308,10 +359,19 @@ def run_server():
|
||||
logger.info(f'OpenAI-compatible API URL:\n\nhttp://{server_addr}:{port}\n')
|
||||
|
||||
if shared.args.api_key:
|
||||
if not shared.args.admin_key:
|
||||
shared.args.admin_key = shared.args.api_key
|
||||
|
||||
logger.info(f'OpenAI API key:\n\n{shared.args.api_key}\n')
|
||||
|
||||
if shared.args.admin_key:
|
||||
logger.info(f'OpenAI API admin key (for loading/unloading models):\n\n{shared.args.admin_key}\n')
|
||||
|
||||
uvicorn.run(app, host=server_addr, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile)
|
||||
|
||||
|
||||
def setup():
|
||||
Thread(target=run_server, daemon=True).start()
|
||||
if shared.args.nowebui:
|
||||
run_server()
|
||||
else:
|
||||
Thread(target=run_server, daemon=True).start()
|
||||
|
@ -122,38 +122,6 @@ class ChatCompletionResponse(BaseModel):
|
||||
usage: dict
|
||||
|
||||
|
||||
class EncodeRequest(BaseModel):
|
||||
text: str
|
||||
|
||||
|
||||
class DecodeRequest(BaseModel):
|
||||
tokens: List[int]
|
||||
|
||||
|
||||
class EncodeResponse(BaseModel):
|
||||
tokens: List[int]
|
||||
length: int
|
||||
|
||||
|
||||
class DecodeResponse(BaseModel):
|
||||
text: str
|
||||
|
||||
|
||||
class TokenCountResponse(BaseModel):
|
||||
length: int
|
||||
|
||||
|
||||
class ModelInfoResponse(BaseModel):
|
||||
model_name: str
|
||||
lora_names: List[str]
|
||||
|
||||
|
||||
class LoadModelRequest(BaseModel):
|
||||
model_name: str
|
||||
args: dict | None = None
|
||||
settings: dict | None = None
|
||||
|
||||
|
||||
class EmbeddingsRequest(BaseModel):
|
||||
input: str | List[str]
|
||||
model: str | None = Field(default=None, description="Unused parameter. To change the model, set the OPENEDAI_EMBEDDING_MODEL and OPENEDAI_EMBEDDING_DEVICE environment variables before starting the server.")
|
||||
@ -167,6 +135,68 @@ class EmbeddingsResponse(BaseModel):
|
||||
object: str = "embedding"
|
||||
|
||||
|
||||
class EncodeRequest(BaseModel):
|
||||
text: str
|
||||
|
||||
|
||||
class EncodeResponse(BaseModel):
|
||||
tokens: List[int]
|
||||
length: int
|
||||
|
||||
|
||||
class DecodeRequest(BaseModel):
|
||||
tokens: List[int]
|
||||
|
||||
|
||||
class DecodeResponse(BaseModel):
|
||||
text: str
|
||||
|
||||
|
||||
class TokenCountResponse(BaseModel):
|
||||
length: int
|
||||
|
||||
|
||||
class LogitsRequestParams(BaseModel):
|
||||
prompt: str
|
||||
use_samplers: bool = False
|
||||
frequency_penalty: float | None = 0
|
||||
max_tokens: int | None = 16
|
||||
presence_penalty: float | None = 0
|
||||
temperature: float | None = 1
|
||||
top_p: float | None = 1
|
||||
|
||||
|
||||
class LogitsRequest(GenerationOptions, LogitsRequestParams):
|
||||
pass
|
||||
|
||||
|
||||
class LogitsResponse(BaseModel):
|
||||
logits: dict
|
||||
|
||||
|
||||
class ModelInfoResponse(BaseModel):
|
||||
model_name: str
|
||||
lora_names: List[str]
|
||||
|
||||
|
||||
class ModelListResponse(BaseModel):
|
||||
model_names: List[str]
|
||||
|
||||
|
||||
class LoadModelRequest(BaseModel):
|
||||
model_name: str
|
||||
args: dict | None = None
|
||||
settings: dict | None = None
|
||||
|
||||
|
||||
class LoraListResponse(BaseModel):
|
||||
lora_names: List[str]
|
||||
|
||||
|
||||
class LoadLorasRequest(BaseModel):
|
||||
lora_names: List[str]
|
||||
|
||||
|
||||
def to_json(obj):
|
||||
return json.dumps(obj.__dict__, indent=4)
|
||||
|
||||
|
38
js/main.js
38
js/main.js
@ -312,6 +312,10 @@ document.addEventListener("click", function (event) {
|
||||
if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
|
||||
hideMenu();
|
||||
}
|
||||
|
||||
if (event.target.classList.contains("pfp_character")) {
|
||||
toggleBigPicture();
|
||||
}
|
||||
});
|
||||
|
||||
//------------------------------------------------
|
||||
@ -335,3 +339,37 @@ document.getElementById("show-controls").parentNode.style.bottom = "0px";
|
||||
// Focus on the chat input
|
||||
//------------------------------------------------
|
||||
document.querySelector("#chat-input textarea").focus();
|
||||
|
||||
//------------------------------------------------
|
||||
// Show enlarged character picture when the profile
|
||||
// picture is clicked on
|
||||
//------------------------------------------------
|
||||
let bigPictureVisible = false;
|
||||
|
||||
function addBigPicture() {
|
||||
var imgElement = document.createElement("img");
|
||||
var timestamp = new Date().getTime();
|
||||
imgElement.src = "/file/cache/pfp_character.png?time=" + timestamp;
|
||||
imgElement.classList.add("bigProfilePicture");
|
||||
|
||||
var imgElementParent = document.getElementById("chat").parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.parentNode;
|
||||
imgElementParent.appendChild(imgElement);
|
||||
}
|
||||
|
||||
function deleteBigPicture() {
|
||||
var bigProfilePictures = document.querySelectorAll('.bigProfilePicture');
|
||||
bigProfilePictures.forEach(function (element) {
|
||||
element.parentNode.removeChild(element);
|
||||
});
|
||||
}
|
||||
|
||||
function toggleBigPicture() {
|
||||
if(bigPictureVisible) {
|
||||
deleteBigPicture();
|
||||
bigPictureVisible = false;
|
||||
} else {
|
||||
addBigPicture();
|
||||
bigPictureVisible = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,6 +10,12 @@ function toggle_controls(value) {
|
||||
chatParent.classList.remove("bigchat");
|
||||
document.getElementById("chat-input-row").classList.remove("bigchat");
|
||||
document.getElementById("chat-col").classList.remove("bigchat");
|
||||
|
||||
let gallery_element = document.getElementById('gallery-extension');
|
||||
if (gallery_element) {
|
||||
gallery_element.style.display = 'block';
|
||||
}
|
||||
|
||||
} else {
|
||||
belowChatInput.forEach(element => {
|
||||
element.style.display = "none";
|
||||
|
7
js/update_big_picture.js
Normal file
7
js/update_big_picture.js
Normal file
@ -0,0 +1,7 @@
|
||||
function updateBigPicture() {
|
||||
var existingElement = document.querySelector('.bigProfilePicture');
|
||||
if (existingElement) {
|
||||
var timestamp = new Date().getTime();
|
||||
existingElement.src = "/file/cache/pfp_character.png?time=" + timestamp;
|
||||
}
|
||||
}
|
@ -145,14 +145,12 @@ def add_lora_transformers(lora_names):
|
||||
if len(lora_names) > 1:
|
||||
merge_loras()
|
||||
|
||||
shared.lora_names = lora_names
|
||||
return
|
||||
|
||||
# If any LoRA needs to be removed, start over
|
||||
if len(removed_set) > 0:
|
||||
# shared.model may no longer be PeftModel
|
||||
if hasattr(shared.model, 'disable_adapter'):
|
||||
shared.model.disable_adapter()
|
||||
shared.model = shared.model.base_model.model
|
||||
shared.model = shared.model.unload()
|
||||
|
||||
if len(lora_names) > 0:
|
||||
params = {}
|
||||
@ -172,8 +170,6 @@ def add_lora_transformers(lora_names):
|
||||
if len(lora_names) > 1:
|
||||
merge_loras()
|
||||
|
||||
shared.lora_names = lora_names
|
||||
|
||||
if not shared.args.load_in_8bit and not shared.args.cpu:
|
||||
shared.model.half()
|
||||
if not hasattr(shared.model, "hf_device_map"):
|
||||
@ -186,6 +182,8 @@ def add_lora_transformers(lora_names):
|
||||
else:
|
||||
shared.model = shared.model.cuda()
|
||||
|
||||
shared.lora_names = lora_names
|
||||
|
||||
|
||||
def merge_loras():
|
||||
if len(list({shared.model.peft_config[adapter].r for adapter in shared.model.peft_config.keys()})) > 1:
|
||||
|
@ -91,7 +91,13 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
||||
if state['mode'] == 'chat-instruct':
|
||||
wrapper = ''
|
||||
command = state['chat-instruct_command'].replace('<|character|>', state['name2'] if not impersonate else state['name1'])
|
||||
wrapper += state['context_instruct']
|
||||
context_instruct = state['context_instruct']
|
||||
if state['custom_system_message'].strip() != '':
|
||||
context_instruct = context_instruct.replace('<|system-message|>', state['custom_system_message'])
|
||||
else:
|
||||
context_instruct = context_instruct.replace('<|system-message|>', state['system_message'])
|
||||
|
||||
wrapper += context_instruct
|
||||
wrapper += all_substrings['instruct']['user_turn'].replace('<|user-message|>', command)
|
||||
wrapper += all_substrings['instruct']['bot_turn_stripped']
|
||||
if impersonate:
|
||||
@ -539,9 +545,13 @@ def generate_pfp_cache(character):
|
||||
|
||||
for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
|
||||
if path.exists():
|
||||
img = make_thumbnail(Image.open(path))
|
||||
img.save(Path('cache/pfp_character.png'), format='PNG')
|
||||
return img
|
||||
original_img = Image.open(path)
|
||||
original_img.save(Path('cache/pfp_character.png'), format='PNG')
|
||||
|
||||
thumb = make_thumbnail(original_img)
|
||||
thumb.save(Path('cache/pfp_character_thumb.png'), format='PNG')
|
||||
|
||||
return thumb
|
||||
|
||||
return None
|
||||
|
||||
@ -570,8 +580,9 @@ def load_character(character, name1, name2, instruct=False):
|
||||
file_contents = open(filepath, 'r', encoding='utf-8').read()
|
||||
data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
|
||||
|
||||
if Path("cache/pfp_character.png").exists() and not instruct:
|
||||
Path("cache/pfp_character.png").unlink()
|
||||
for path in [Path("cache/pfp_character.png"), Path("cache/pfp_character_thumb.png")]:
|
||||
if path.exists() and not instruct:
|
||||
path.unlink()
|
||||
|
||||
picture = generate_pfp_cache(character)
|
||||
|
||||
|
@ -225,7 +225,7 @@ def generate_cai_chat_html(history, name1, name2, style, reset_cache=False):
|
||||
output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
|
||||
|
||||
# We use ?name2 and ?time.time() to force the browser to reset caches
|
||||
img_bot = f'<img src="file/cache/pfp_character.png?{name2}">' if Path("cache/pfp_character.png").exists() else ''
|
||||
img_bot = f'<img src="file/cache/pfp_character_thumb.png?{name2}" class="pfp_character">' if Path("cache/pfp_character_thumb.png").exists() else ''
|
||||
img_me = f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">' if Path("cache/pfp_me.png").exists() else ''
|
||||
|
||||
for i, _row in enumerate(history):
|
||||
|
@ -39,7 +39,7 @@ class LlamacppHF(PreTrainedModel):
|
||||
'n_tokens': self.model.n_tokens,
|
||||
'input_ids': self.model.input_ids,
|
||||
'scores': self.model.scores,
|
||||
'ctx': self.model.ctx
|
||||
'ctx': self.model._ctx
|
||||
}
|
||||
|
||||
if shared.args.cfg_cache:
|
||||
@ -65,7 +65,7 @@ class LlamacppHF(PreTrainedModel):
|
||||
'n_tokens': self.model.n_tokens,
|
||||
'input_ids': self.model.input_ids,
|
||||
'scores': self.model.scores,
|
||||
'ctx': self.model.ctx
|
||||
'ctx': self.model._ctx
|
||||
})
|
||||
|
||||
def save_negative_cache(self):
|
||||
@ -73,20 +73,20 @@ class LlamacppHF(PreTrainedModel):
|
||||
'n_tokens': self.model.n_tokens,
|
||||
'input_ids': self.model.input_ids,
|
||||
'scores': self.model.scores,
|
||||
'ctx': self.model.ctx
|
||||
'ctx': self.model._ctx
|
||||
})
|
||||
|
||||
def load_cache(self):
|
||||
self.model.n_tokens = self.llamacpp_cache['n_tokens']
|
||||
self.model.input_ids = self.llamacpp_cache['input_ids']
|
||||
self.model.scores = self.llamacpp_cache['scores']
|
||||
self.model.ctx = self.llamacpp_cache['ctx']
|
||||
self.model._ctx = self.llamacpp_cache['ctx']
|
||||
|
||||
def load_negative_cache(self):
|
||||
self.model.n_tokens = self.llamacpp_cache_negative['n_tokens']
|
||||
self.model.input_ids = self.llamacpp_cache_negative['input_ids']
|
||||
self.model.scores = self.llamacpp_cache_negative['scores']
|
||||
self.model.ctx = self.llamacpp_cache_negative['ctx']
|
||||
self.model._ctx = self.llamacpp_cache_negative['ctx']
|
||||
|
||||
@property
|
||||
def device(self) -> torch.device:
|
||||
@ -192,7 +192,6 @@ class LlamacppHF(PreTrainedModel):
|
||||
params = {
|
||||
'model_path': str(model_file),
|
||||
'n_ctx': shared.args.n_ctx,
|
||||
'seed': int(shared.args.llama_cpp_seed),
|
||||
'n_threads': shared.args.threads or None,
|
||||
'n_threads_batch': shared.args.threads_batch or None,
|
||||
'n_batch': shared.args.n_batch,
|
||||
|
@ -74,7 +74,6 @@ class LlamaCppModel:
|
||||
params = {
|
||||
'model_path': str(path),
|
||||
'n_ctx': shared.args.n_ctx,
|
||||
'seed': int(shared.args.llama_cpp_seed),
|
||||
'n_threads': shared.args.threads or None,
|
||||
'n_threads_batch': shared.args.threads_batch or None,
|
||||
'n_batch': shared.args.n_batch,
|
||||
@ -144,15 +143,16 @@ class LlamaCppModel:
|
||||
max_tokens=state['max_new_tokens'],
|
||||
temperature=state['temperature'],
|
||||
top_p=state['top_p'],
|
||||
top_k=state['top_k'],
|
||||
repeat_penalty=state['repetition_penalty'],
|
||||
presence_penalty=state['presence_penalty'],
|
||||
frequency_penalty=state['frequency_penalty'],
|
||||
presence_penalty=state['presence_penalty'],
|
||||
repeat_penalty=state['repetition_penalty'],
|
||||
top_k=state['top_k'],
|
||||
stream=True,
|
||||
seed=int(state['seed']) if state['seed'] != -1 else None,
|
||||
tfs_z=state['tfs'],
|
||||
mirostat_mode=int(state['mirostat_mode']),
|
||||
mirostat_tau=state['mirostat_tau'],
|
||||
mirostat_eta=state['mirostat_eta'],
|
||||
stream=True,
|
||||
logits_processor=logit_processors,
|
||||
grammar=self.grammar
|
||||
)
|
||||
|
@ -99,7 +99,6 @@ loaders_and_params = OrderedDict({
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'no_mul_mat_q',
|
||||
'llama_cpp_seed',
|
||||
'alpha_value',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
@ -366,6 +365,7 @@ loaders_samplers = {
|
||||
'repetition_penalty',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'seed',
|
||||
'mirostat_mode',
|
||||
'mirostat_tau',
|
||||
'mirostat_eta',
|
||||
|
@ -8,7 +8,7 @@ from modules.text_generation import generate_reply
|
||||
global_scores = None
|
||||
|
||||
|
||||
def get_next_logits(prompt, state, use_samplers, previous):
|
||||
def get_next_logits(prompt, state, use_samplers, previous, return_dict=False):
|
||||
if shared.model is None:
|
||||
logger.error("No model is loaded! Select one in the Model tab.")
|
||||
return 'Error: No model is loaded1 Select one in the Model tab.', previous
|
||||
@ -56,8 +56,16 @@ def get_next_logits(prompt, state, use_samplers, previous):
|
||||
topk_indices = [i.expand((1, 1)) for i in topk_indices]
|
||||
|
||||
tokens = [shared.tokenizer.decode(i) for i in topk_indices]
|
||||
output = ''
|
||||
for row in list(zip(topk_values, tokens)):
|
||||
output += f"{row[0]} - {repr(row[1])}\n"
|
||||
|
||||
return output, previous
|
||||
if return_dict:
|
||||
output = {}
|
||||
for row in list(zip(topk_values, tokens)):
|
||||
output[row[1]] = row[0]
|
||||
|
||||
return output
|
||||
else:
|
||||
output = ''
|
||||
for row in list(zip(topk_values, tokens)):
|
||||
output += f"{row[0]} - {repr(row[1])}\n"
|
||||
|
||||
return output, previous
|
||||
|
@ -1,8 +1,12 @@
|
||||
import functools
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from modules import shared
|
||||
from modules.loaders import loaders_samplers
|
||||
|
||||
|
||||
def default_preset():
|
||||
return {
|
||||
@ -63,6 +67,45 @@ def load_preset_for_ui(name, state):
|
||||
return state, *[generate_params[k] for k in presets_params()]
|
||||
|
||||
|
||||
def random_preset(state):
|
||||
params_and_values = {
|
||||
'remove_tail_tokens': {
|
||||
'top_p': [0.5, 0.8, 0.9, 0.95, 0.99],
|
||||
'min_p': [0.5, 0.2, 0.1, 0.05, 0.01],
|
||||
'top_k': [3, 5, 10, 20, 30, 40],
|
||||
'typical_p': [0.2, 0.575, 0.95],
|
||||
'tfs': [0.5, 0.8, 0.9, 0.95, 0.99],
|
||||
'top_a': [0.5, 0.2, 0.1, 0.05, 0.01],
|
||||
'epsilon_cutoff': [1, 3, 5, 7, 9],
|
||||
'eta_cutoff': [3, 6, 9, 12, 15, 18],
|
||||
},
|
||||
'flatten_distribution': {
|
||||
'temperature': [0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0],
|
||||
},
|
||||
'repetition': {
|
||||
'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
|
||||
'presence_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
|
||||
'frequency_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
|
||||
},
|
||||
'other': {
|
||||
'temperature_last': [True, False],
|
||||
}
|
||||
}
|
||||
|
||||
generate_params = default_preset()
|
||||
for cat in params_and_values:
|
||||
choices = list(params_and_values[cat].keys())
|
||||
if shared.args.loader is not None:
|
||||
choices = [x for x in choices if x in loaders_samplers[shared.args.loader]]
|
||||
|
||||
if len(choices) > 0:
|
||||
choice = random.choice(choices)
|
||||
generate_params[choice] = random.choice(params_and_values[cat][choice])
|
||||
|
||||
state.update(generate_params)
|
||||
return state, *[generate_params[k] for k in presets_params()]
|
||||
|
||||
|
||||
def generate_preset_yaml(state):
|
||||
defaults = default_preset()
|
||||
data = {k: state[k] for k in presets_params()}
|
||||
|
@ -112,7 +112,6 @@ parser.add_argument('--no-mmap', action='store_true', help='Prevent mmap from be
|
||||
parser.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
|
||||
parser.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.')
|
||||
parser.add_argument('--tensor_split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 18,17.')
|
||||
parser.add_argument('--llama_cpp_seed', type=int, default=0, help='Seed for llama-cpp models. Default is 0 (random).')
|
||||
parser.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
|
||||
parser.add_argument('--logits_all', action='store_true', help='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
|
||||
parser.add_argument('--cache-capacity', type=str, help='Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed.')
|
||||
@ -171,6 +170,8 @@ parser.add_argument('--public-api', action='store_true', help='Create a public U
|
||||
parser.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)
|
||||
parser.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.')
|
||||
parser.add_argument('--api-key', type=str, default='', help='API authentication key.')
|
||||
parser.add_argument('--admin-key', type=str, default='', help='API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.')
|
||||
parser.add_argument('--nowebui', action='store_true', help='Do not launch the Gradio UI. Useful for launching the API in standalone mode.')
|
||||
|
||||
# Multimodal
|
||||
parser.add_argument('--multimodal-pipeline', type=str, default=None, help='The multimodal pipeline to use. Examples: llava-7b, llava-13b.')
|
||||
@ -182,6 +183,7 @@ parser.add_argument('--no-stream', action='store_true', help='DEPRECATED')
|
||||
parser.add_argument('--mul_mat_q', action='store_true', help='DEPRECATED')
|
||||
parser.add_argument('--api-blocking-port', type=int, default=5000, help='DEPRECATED')
|
||||
parser.add_argument('--api-streaming-port', type=int, default=5005, help='DEPRECATED')
|
||||
parser.add_argument('--llama_cpp_seed', type=int, default=0, help='DEPRECATED')
|
||||
parser.add_argument('--use_fast', action='store_true', help='DEPRECATED')
|
||||
|
||||
args = parser.parse_args()
|
||||
@ -200,7 +202,7 @@ for k in ['notebook', 'chat', 'no_stream', 'mul_mat_q', 'use_fast']:
|
||||
# Security warnings
|
||||
if args.trust_remote_code:
|
||||
logger.warning('trust_remote_code is enabled. This is dangerous.')
|
||||
if 'COLAB_GPU' not in os.environ:
|
||||
if 'COLAB_GPU' not in os.environ and not args.nowebui:
|
||||
if args.share:
|
||||
logger.warning("The gradio \"share link\" feature uses a proprietary executable to create a reverse tunnel. Use it with care.")
|
||||
if any((args.listen, args.share)) and not any((args.gradio_auth, args.gradio_auth_path)):
|
||||
|
@ -20,6 +20,8 @@ with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
|
||||
switch_tabs_js = f.read()
|
||||
with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
|
||||
show_controls_js = f.read()
|
||||
with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
|
||||
update_big_picture_js = f.read()
|
||||
|
||||
refresh_symbol = '🔄'
|
||||
delete_symbol = '🗑️'
|
||||
@ -80,7 +82,6 @@ def list_model_elements():
|
||||
'n_gpu_layers',
|
||||
'tensor_split',
|
||||
'n_ctx',
|
||||
'llama_cpp_seed',
|
||||
'gpu_split',
|
||||
'max_seq_len',
|
||||
'compress_pos_emb',
|
||||
|
@ -275,7 +275,8 @@ def create_event_handlers():
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
|
||||
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
|
||||
lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id'))
|
||||
lambda x: gr.update(choices=(histories := chat.find_all_histories(x)), value=histories[0]), gradio('interface_state'), gradio('unique_id')).then(
|
||||
lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
|
||||
|
||||
shared.gradio['mode'].change(
|
||||
lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then(
|
||||
|
@ -37,6 +37,14 @@ def create_ui():
|
||||
shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop', interactive=not mu)
|
||||
shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes="small-button")
|
||||
|
||||
# Preset saver
|
||||
with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['preset_saver']:
|
||||
shared.gradio['save_preset_filename'] = gr.Textbox(lines=1, label='File name', info='The preset will be saved to your presets/ folder with this base filename.')
|
||||
shared.gradio['save_preset_contents'] = gr.Textbox(lines=10, label='File contents')
|
||||
with gr.Row():
|
||||
shared.gradio['save_preset_confirm'] = gr.Button('Save', elem_classes="small-button", variant='primary', interactive=not mu)
|
||||
shared.gradio['save_preset_cancel'] = gr.Button('Cancel', elem_classes="small-button")
|
||||
|
||||
|
||||
def create_event_handlers():
|
||||
shared.gradio['save_confirm'].click(
|
||||
@ -65,10 +73,16 @@ def create_event_handlers():
|
||||
|
||||
shared.gradio['save_preset'].click(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
presets.generate_preset_yaml, gradio('interface_state'), gradio('save_contents')).then(
|
||||
lambda: 'presets/', None, gradio('save_root')).then(
|
||||
lambda: 'My Preset.yaml', None, gradio('save_filename')).then(
|
||||
lambda: gr.update(visible=True), None, gradio('file_saver'))
|
||||
presets.generate_preset_yaml, gradio('interface_state'), gradio('save_preset_contents')).then(
|
||||
lambda: 'My Preset', None, gradio('save_preset_filename')).then(
|
||||
lambda: gr.update(visible=True), None, gradio('preset_saver'))
|
||||
|
||||
shared.gradio['save_preset_confirm'].click(
|
||||
lambda x, y: utils.save_file(f'presets/{x}.yaml', y), gradio('save_preset_filename', 'save_preset_contents'), None).then(
|
||||
lambda: gr.update(visible=False), None, gradio('preset_saver')).then(
|
||||
lambda x: gr.update(choices=utils.get_available_presets(), value=x), gradio('save_preset_filename'), gradio('preset_menu'))
|
||||
|
||||
shared.gradio['save_preset_cancel'].click(lambda: gr.update(visible=False), None, gradio('preset_saver'))
|
||||
|
||||
shared.gradio['delete_preset'].click(
|
||||
lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then(
|
||||
|
@ -120,7 +120,6 @@ def create_ui():
|
||||
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
||||
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant)
|
||||
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17')
|
||||
shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed)
|
||||
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='To enable this option, start the web UI with the --trust-remote-code flag. It is necessary for some models.', interactive=shared.args.trust_remote_code)
|
||||
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Create an additional cache for CFG negative prompts.')
|
||||
shared.gradio['logits_all'] = gr.Checkbox(label="logits_all", value=shared.args.logits_all, info='Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower.')
|
||||
|
@ -18,6 +18,7 @@ def create_ui(default_preset):
|
||||
ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
|
||||
shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
|
||||
shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
|
||||
shared.gradio['random_preset'] = gr.Button('🎲', elem_classes='refresh-button')
|
||||
|
||||
with gr.Column():
|
||||
shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()), value="All", elem_classes='slim-dropdown')
|
||||
@ -90,6 +91,7 @@ def create_ui(default_preset):
|
||||
def create_event_handlers():
|
||||
shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader'), gradio(loaders.list_all_samplers()), show_progress=False)
|
||||
shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
|
||||
shared.gradio['random_preset'].click(presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()))
|
||||
shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'))
|
||||
|
||||
|
||||
|
@ -27,14 +27,14 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
@ -67,14 +67,14 @@ https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
|
@ -27,14 +27,14 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
@ -45,10 +45,10 @@ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.18+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.18+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.18+rocm5.6.1-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.18+rocm5.6.1-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
|
@ -27,14 +27,14 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
|
@ -27,19 +27,15 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
|
||||
|
@ -27,19 +27,19 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp39-cp39-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp38-cp38-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp39-cp39-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.18-cp38-cp38-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.8"
|
||||
|
@ -27,11 +27,11 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-manylinux_2_17_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.18/llama_cpp_python-0.2.18-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
|
@ -27,11 +27,11 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
|
@ -27,14 +27,14 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.18+cpuavx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
@ -67,14 +67,14 @@ https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121avx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.18+cu121avx-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||
|
24
server.py
24
server.py
@ -226,13 +226,19 @@ if __name__ == "__main__":
|
||||
|
||||
shared.generation_lock = Lock()
|
||||
|
||||
# Launch the web UI
|
||||
create_interface()
|
||||
while True:
|
||||
time.sleep(0.5)
|
||||
if shared.need_restart:
|
||||
shared.need_restart = False
|
||||
if shared.args.nowebui:
|
||||
# Start the API in standalone mode
|
||||
shared.args.extensions = [x for x in shared.args.extensions if x != 'gallery']
|
||||
if shared.args.extensions is not None and len(shared.args.extensions) > 0:
|
||||
extensions_module.load_extensions()
|
||||
else:
|
||||
# Launch the web UI
|
||||
create_interface()
|
||||
while True:
|
||||
time.sleep(0.5)
|
||||
shared.gradio['interface'].close()
|
||||
time.sleep(0.5)
|
||||
create_interface()
|
||||
if shared.need_restart:
|
||||
shared.need_restart = False
|
||||
time.sleep(0.5)
|
||||
shared.gradio['interface'].close()
|
||||
time.sleep(0.5)
|
||||
create_interface()
|
||||
|
Loading…
Reference in New Issue
Block a user