mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-27 12:33:17 +01:00
commit
1934cb61ef
@ -269,6 +269,9 @@ List of command-line flags
|
||||
| `--logits_all`| Needs to be set for perplexity evaluation to work. Otherwise, ignore it, as it makes prompt processing slower. |
|
||||
| `--no_offload_kqv` | Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance. |
|
||||
| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
|
||||
| `--row_split` | Split the model by rows across GPUs. This may improve multi-gpu performance. |
|
||||
| `--streaming-llm` | Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed. |
|
||||
| `--attention-sink-size ATTENTION_SINK_SIZE` | StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn't share a prefix with the old prompt. |
|
||||
|
||||
#### ExLlamav2
|
||||
|
||||
|
@ -80,16 +80,17 @@ Example: https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF
|
||||
|
||||
* **n-gpu-layers**: The number of layers to allocate to the GPU. If set to 0, only the CPU will be used. If you want to offload all layers, you can simply set this to the maximum value.
|
||||
* **n_ctx**: Context length of the model. In llama.cpp, the cache is preallocated, so the higher this value, the higher the VRAM. It is automatically set to the maximum sequence length for the model based on the metadata inside the GGUF file, but you may need to lower this value be able to fit the model into your GPU. After loading the model, the "Truncate the prompt up to this length" parameter under "Parameters" > "Generation" is automatically set to your chosen "n_ctx" so that you don't have to set the same thing twice.
|
||||
* **tensor_split**: For multi-gpu only. Sets the amount of memory to allocate per GPU as proportions. Not to be confused with other loaders where this is set in GB; here you can set something like `30,70` for 30%/70%.
|
||||
* **n_batch**: Batch size for prompt processing. Higher values are supposed to make generation faster, but I have never obtained any benefit from changing this value.
|
||||
* **threads**: Number of threads. Recommended value: your number of physical cores.
|
||||
* **threads_batch**: Number of threads for batch processing. Recommended value: your total number of cores (physical + virtual).
|
||||
* **n_batch**: Batch size for prompt processing. Higher values are supposed to make generation faster, but I have never obtained any benefit from changing this value.
|
||||
* **tensorcores**: Use llama.cpp compiled with "tensor cores" support, which improves performance on NVIDIA RTX cards in most cases.
|
||||
* **streamingllm**: Experimental feature to avoid re-evaluating the entire prompt when part of it is removed, for instance, when you hit the context length for the model in chat mode and an old message is removed.
|
||||
* **cpu**: Force a version of llama.cpp compiled without GPU acceleration to be used. Can usually be ignored. Only set this if you want to use CPU only and llama.cpp doesn't work otherwise.
|
||||
* **no_mul_mat_q**: Disable the mul_mat_q kernel. This kernel usually improves generation speed significantly. This option to disable it is included in case it doesn't work on some system.
|
||||
* **no-mmap**: Loads the model into memory at once, possibly preventing I/O operations later on at the cost of a longer load time.
|
||||
* **mlock**: Force the system to keep the model in RAM rather than swapping or compressing (no idea what this means, never used it).
|
||||
* **numa**: May improve performance on certain multi-cpu systems.
|
||||
* **cpu**: Force a version of llama.cpp compiled without GPU acceleration to be used. Can usually be ignored. Only set this if you want to use CPU only and llama.cpp doesn't work otherwise.
|
||||
* **tensor_split**: For multi-gpu only. Sets the amount of memory to allocate per GPU.
|
||||
* **Seed**: The seed for the llama.cpp random number generator. Not very useful as it can only be set once (that I'm aware).
|
||||
|
||||
### llamacpp_HF
|
||||
|
||||
|
@ -250,13 +250,13 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) -
|
||||
else:
|
||||
instruction_template_str = shared.settings['instruction_template_str']
|
||||
|
||||
chat_template_str = body['chat_template_str'] or shared.settings['chat_template_str']
|
||||
chat_instruct_command = body['chat_instruct_command'] or shared.settings['chat-instruct_command']
|
||||
chat_template_str = body['chat_template_str'] or shared.default_settings['chat_template_str']
|
||||
chat_instruct_command = body['chat_instruct_command'] or shared.default_settings['chat-instruct_command']
|
||||
|
||||
# Chat character
|
||||
character = body['character'] or shared.settings['character']
|
||||
character = body['character'] or shared.default_settings['character']
|
||||
character = "Assistant" if character == "None" else character
|
||||
name1 = body['user_name'] or shared.settings['name1']
|
||||
name1 = body['user_name'] or shared.default_settings['name1']
|
||||
name1, name2, _, greeting, context = load_character_memoized(character, name1, '')
|
||||
name2 = body['bot_name'] or name2
|
||||
context = body['context'] or context
|
||||
|
@ -1,43 +1,24 @@
|
||||
import random
|
||||
|
||||
import chromadb
|
||||
import posthog
|
||||
import torch
|
||||
from chromadb.config import Settings
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from chromadb.utils import embedding_functions
|
||||
|
||||
from modules.logging_colors import logger
|
||||
|
||||
logger.info('Intercepting all calls to posthog :)')
|
||||
# Intercept calls to posthog
|
||||
posthog.capture = lambda *args, **kwargs: None
|
||||
|
||||
|
||||
class Collecter():
|
||||
embedder = embedding_functions.SentenceTransformerEmbeddingFunction("sentence-transformers/all-mpnet-base-v2")
|
||||
|
||||
|
||||
class ChromaCollector():
|
||||
def __init__(self):
|
||||
pass
|
||||
name = ''.join(random.choice('ab') for _ in range(10))
|
||||
|
||||
def add(self, texts: list[str]):
|
||||
pass
|
||||
|
||||
def get(self, search_strings: list[str], n_results: int) -> list[str]:
|
||||
pass
|
||||
|
||||
def clear(self):
|
||||
pass
|
||||
|
||||
|
||||
class Embedder():
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def embed(self, text: str) -> list[torch.Tensor]:
|
||||
pass
|
||||
|
||||
|
||||
class ChromaCollector(Collecter):
|
||||
def __init__(self, embedder: Embedder):
|
||||
super().__init__()
|
||||
self.name = name
|
||||
self.chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
|
||||
self.embedder = embedder
|
||||
self.collection = self.chroma_client.create_collection(name="context", embedding_function=embedder.embed)
|
||||
self.collection = self.chroma_client.create_collection(name=name, embedding_function=embedder)
|
||||
self.ids = []
|
||||
|
||||
def add(self, texts: list[str]):
|
||||
@ -102,24 +83,15 @@ class ChromaCollector(Collecter):
|
||||
return sorted(ids)
|
||||
|
||||
def clear(self):
|
||||
self.collection.delete(ids=self.ids)
|
||||
self.ids = []
|
||||
|
||||
|
||||
class SentenceTransformerEmbedder(Embedder):
|
||||
def __init__(self) -> None:
|
||||
self.model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
||||
self.embed = self.model.encode
|
||||
self.chroma_client.delete_collection(name=self.name)
|
||||
self.collection = self.chroma_client.create_collection(name=self.name, embedding_function=embedder)
|
||||
|
||||
|
||||
def make_collector():
|
||||
global embedder
|
||||
return ChromaCollector(embedder)
|
||||
return ChromaCollector()
|
||||
|
||||
|
||||
def add_chunks_to_collector(chunks, collector):
|
||||
collector.clear()
|
||||
collector.add(chunks)
|
||||
|
||||
|
||||
embedder = SentenceTransformerEmbedder()
|
||||
|
@ -1,5 +1,5 @@
|
||||
beautifulsoup4==4.12.2
|
||||
chromadb==0.3.18
|
||||
chromadb==0.4.24
|
||||
pandas==2.0.3
|
||||
posthog==2.4.2
|
||||
sentence_transformers==2.2.2
|
||||
|
@ -12,17 +12,16 @@ This module is responsible for the VectorDB API. It currently supports:
|
||||
|
||||
import json
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
from threading import Thread
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
from modules import shared
|
||||
from modules.logging_colors import logger
|
||||
|
||||
from .chromadb import ChromaCollector
|
||||
from .data_processor import process_and_add_to_collector
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
|
||||
|
||||
class CustomThreadingHTTPServer(ThreadingHTTPServer):
|
||||
def __init__(self, server_address, RequestHandlerClass, collector: ChromaCollector, bind_and_activate=True):
|
||||
@ -38,7 +37,6 @@ class Handler(BaseHTTPRequestHandler):
|
||||
self.collector = collector
|
||||
super().__init__(request, client_address, server)
|
||||
|
||||
|
||||
def _send_412_error(self, message):
|
||||
self.send_response(412)
|
||||
self.send_header("Content-type", "application/json")
|
||||
@ -46,7 +44,6 @@ class Handler(BaseHTTPRequestHandler):
|
||||
response = json.dumps({"error": message})
|
||||
self.wfile.write(response.encode('utf-8'))
|
||||
|
||||
|
||||
def _send_404_error(self):
|
||||
self.send_response(404)
|
||||
self.send_header("Content-type", "application/json")
|
||||
@ -54,14 +51,12 @@ class Handler(BaseHTTPRequestHandler):
|
||||
response = json.dumps({"error": "Resource not found"})
|
||||
self.wfile.write(response.encode('utf-8'))
|
||||
|
||||
|
||||
def _send_400_error(self, error_message: str):
|
||||
self.send_response(400)
|
||||
self.send_header("Content-type", "application/json")
|
||||
self.end_headers()
|
||||
response = json.dumps({"error": error_message})
|
||||
self.wfile.write(response.encode('utf-8'))
|
||||
|
||||
|
||||
def _send_200_response(self, message: str):
|
||||
self.send_response(200)
|
||||
@ -75,24 +70,21 @@ class Handler(BaseHTTPRequestHandler):
|
||||
|
||||
self.wfile.write(response.encode('utf-8'))
|
||||
|
||||
|
||||
def _handle_get(self, search_strings: list[str], n_results: int, max_token_count: int, sort_param: str):
|
||||
if sort_param == parameters.SORT_DISTANCE:
|
||||
results = self.collector.get_sorted_by_dist(search_strings, n_results, max_token_count)
|
||||
elif sort_param == parameters.SORT_ID:
|
||||
results = self.collector.get_sorted_by_id(search_strings, n_results, max_token_count)
|
||||
else: # Default is dist
|
||||
else: # Default is dist
|
||||
results = self.collector.get_sorted_by_dist(search_strings, n_results, max_token_count)
|
||||
|
||||
|
||||
return {
|
||||
"results": results
|
||||
}
|
||||
|
||||
|
||||
def do_GET(self):
|
||||
self._send_404_error()
|
||||
|
||||
|
||||
def do_POST(self):
|
||||
try:
|
||||
content_length = int(self.headers['Content-Length'])
|
||||
@ -107,7 +99,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||
if corpus is None:
|
||||
self._send_412_error("Missing parameter 'corpus'")
|
||||
return
|
||||
|
||||
|
||||
clear_before_adding = body.get('clear_before_adding', False)
|
||||
metadata = body.get('metadata')
|
||||
process_and_add_to_collector(corpus, self.collector, clear_before_adding, metadata)
|
||||
@ -118,7 +110,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||
if corpus is None:
|
||||
self._send_412_error("Missing parameter 'metadata'")
|
||||
return
|
||||
|
||||
|
||||
self.collector.delete(ids_to_delete=None, where=metadata)
|
||||
self._send_200_response("Data successfully deleted")
|
||||
|
||||
@ -127,15 +119,15 @@ class Handler(BaseHTTPRequestHandler):
|
||||
if search_strings is None:
|
||||
self._send_412_error("Missing parameter 'search_strings'")
|
||||
return
|
||||
|
||||
|
||||
n_results = body.get('n_results')
|
||||
if n_results is None:
|
||||
n_results = parameters.get_chunk_count()
|
||||
|
||||
|
||||
max_token_count = body.get('max_token_count')
|
||||
if max_token_count is None:
|
||||
max_token_count = parameters.get_max_token_count()
|
||||
|
||||
|
||||
sort_param = query_params.get('sort', ['distance'])[0]
|
||||
|
||||
results = self._handle_get(search_strings, n_results, max_token_count, sort_param)
|
||||
@ -146,7 +138,6 @@ class Handler(BaseHTTPRequestHandler):
|
||||
except Exception as e:
|
||||
self._send_400_error(str(e))
|
||||
|
||||
|
||||
def do_DELETE(self):
|
||||
try:
|
||||
parsed_path = urlparse(self.path)
|
||||
@ -161,12 +152,10 @@ class Handler(BaseHTTPRequestHandler):
|
||||
except Exception as e:
|
||||
self._send_400_error(str(e))
|
||||
|
||||
|
||||
def do_OPTIONS(self):
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
|
||||
|
||||
def end_headers(self):
|
||||
self.send_header('Access-Control-Allow-Origin', '*')
|
||||
self.send_header('Access-Control-Allow-Methods', '*')
|
||||
@ -197,11 +186,11 @@ class APIManager:
|
||||
|
||||
def stop_server(self):
|
||||
if self.server is not None:
|
||||
logger.info(f'Stopping chromaDB API.')
|
||||
logger.info('Stopping chromaDB API.')
|
||||
self.server.shutdown()
|
||||
self.server.server_close()
|
||||
self.server = None
|
||||
self.is_running = False
|
||||
|
||||
def is_server_running(self):
|
||||
return self.is_running
|
||||
return self.is_running
|
||||
|
@ -9,23 +9,23 @@ The benchmark function will return the score as an integer.
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .data_processor import process_and_add_to_collector, preprocess_text
|
||||
from .data_processor import preprocess_text, process_and_add_to_collector
|
||||
from .parameters import get_chunk_count, get_max_token_count
|
||||
from .utils import create_metadata_source
|
||||
|
||||
|
||||
def benchmark(config_path, collector):
|
||||
# Get the current system date
|
||||
sysdate = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"benchmark_{sysdate}.txt"
|
||||
|
||||
|
||||
# Open the log file in append mode
|
||||
with open(filename, 'a') as log:
|
||||
with open(config_path, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
|
||||
total_points = 0
|
||||
max_points = 0
|
||||
|
||||
@ -45,7 +45,7 @@ def benchmark(config_path, collector):
|
||||
for question_group in item["questions"]:
|
||||
question_variants = question_group["question_variants"]
|
||||
criteria = question_group["criteria"]
|
||||
|
||||
|
||||
for q in question_variants:
|
||||
max_points += len(criteria)
|
||||
processed_text = preprocess_text(q)
|
||||
@ -54,7 +54,7 @@ def benchmark(config_path, collector):
|
||||
results = collector.get_sorted_by_dist(processed_text, n_results=get_chunk_count(), max_token_count=get_max_token_count())
|
||||
|
||||
points = 0
|
||||
|
||||
|
||||
for c in criteria:
|
||||
for p in results:
|
||||
if c in p:
|
||||
@ -69,4 +69,4 @@ def benchmark(config_path, collector):
|
||||
|
||||
print(f'##Total points:\n\n{total_points}/{max_points}', file=log)
|
||||
|
||||
return total_points, max_points
|
||||
return total_points, max_points
|
||||
|
@ -4,16 +4,17 @@ This module is responsible for modifying the chat prompt and history.
|
||||
import re
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
|
||||
from extensions.superboogav2.utils import (
|
||||
create_context_text,
|
||||
create_metadata_source
|
||||
)
|
||||
from modules import chat, shared
|
||||
from modules.text_generation import get_encoded_length
|
||||
from modules.logging_colors import logger
|
||||
from modules.chat import load_character_memoized
|
||||
from extensions.superboogav2.utils import create_context_text, create_metadata_source
|
||||
from modules.logging_colors import logger
|
||||
from modules.text_generation import get_encoded_length
|
||||
|
||||
from .data_processor import process_and_add_to_collector
|
||||
from .chromadb import ChromaCollector
|
||||
|
||||
from .data_processor import process_and_add_to_collector
|
||||
|
||||
CHAT_METADATA = create_metadata_source('automatic-chat-insert')
|
||||
|
||||
@ -21,17 +22,17 @@ CHAT_METADATA = create_metadata_source('automatic-chat-insert')
|
||||
def _remove_tag_if_necessary(user_input: str):
|
||||
if not parameters.get_is_manual():
|
||||
return user_input
|
||||
|
||||
|
||||
return re.sub(r'^\s*!c\s*|\s*!c\s*$', '', user_input)
|
||||
|
||||
|
||||
def _should_query(input: str):
|
||||
if not parameters.get_is_manual():
|
||||
return True
|
||||
|
||||
|
||||
if re.search(r'^\s*!c|!c\s*$', input, re.MULTILINE):
|
||||
return True
|
||||
|
||||
|
||||
return False
|
||||
|
||||
|
||||
@ -69,7 +70,7 @@ def _concatinate_history(history: dict, state: dict):
|
||||
if len(exchange) >= 2:
|
||||
full_history_text += _format_single_exchange(bot_name, exchange[1])
|
||||
|
||||
return full_history_text[:-1] # Remove the last new line.
|
||||
return full_history_text[:-1] # Remove the last new line.
|
||||
|
||||
|
||||
def _hijack_last(context_text: str, history: dict, max_len: int, state: dict):
|
||||
@ -82,20 +83,20 @@ def _hijack_last(context_text: str, history: dict, max_len: int, state: dict):
|
||||
for i, messages in enumerate(reversed(history['internal'])):
|
||||
for j, message in enumerate(reversed(messages)):
|
||||
num_message_tokens = get_encoded_length(_format_single_exchange(names[j], message))
|
||||
|
||||
|
||||
# TODO: This is an extremely naive solution. A more robust implementation must be made.
|
||||
if history_tokens + num_context_tokens <= max_len:
|
||||
# This message can be replaced
|
||||
replace_position = (i, j)
|
||||
|
||||
|
||||
history_tokens += num_message_tokens
|
||||
|
||||
|
||||
if replace_position is None:
|
||||
logger.warn("The provided context_text is too long to replace any message in the history.")
|
||||
else:
|
||||
# replace the message at replace_position with context_text
|
||||
i, j = replace_position
|
||||
history['internal'][-i-1][-j-1] = context_text
|
||||
history['internal'][-i - 1][-j - 1] = context_text
|
||||
|
||||
|
||||
def custom_generate_chat_prompt_internal(user_input: str, state: dict, collector: ChromaCollector, **kwargs):
|
||||
@ -120,5 +121,5 @@ def custom_generate_chat_prompt_internal(user_input: str, state: dict, collector
|
||||
user_input = create_context_text(results) + user_input
|
||||
elif parameters.get_injection_strategy() == parameters.HIJACK_LAST_IN_CONTEXT:
|
||||
_hijack_last(create_context_text(results), kwargs['history'], state['truncation_length'], state)
|
||||
|
||||
|
||||
return chat.generate_chat_prompt(user_input, state, **kwargs)
|
||||
|
@ -1,42 +1,23 @@
|
||||
import threading
|
||||
import chromadb
|
||||
import posthog
|
||||
import torch
|
||||
import math
|
||||
import random
|
||||
import threading
|
||||
|
||||
import chromadb
|
||||
import numpy as np
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
|
||||
import posthog
|
||||
from chromadb.config import Settings
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from chromadb.utils import embedding_functions
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
from modules.logging_colors import logger
|
||||
from modules.text_generation import encode, decode
|
||||
from modules.text_generation import decode, encode
|
||||
|
||||
logger.debug('Intercepting all calls to posthog.')
|
||||
# Intercept calls to posthog
|
||||
posthog.capture = lambda *args, **kwargs: None
|
||||
|
||||
|
||||
class Collecter():
|
||||
def __init__(self):
|
||||
pass
|
||||
embedder = embedding_functions.SentenceTransformerEmbeddingFunction("sentence-transformers/all-mpnet-base-v2")
|
||||
|
||||
def add(self, texts: list[str], texts_with_context: list[str], starting_indices: list[int]):
|
||||
pass
|
||||
|
||||
def get(self, search_strings: list[str], n_results: int) -> list[str]:
|
||||
pass
|
||||
|
||||
def clear(self):
|
||||
pass
|
||||
|
||||
|
||||
class Embedder():
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def embed(self, text: str) -> list[torch.Tensor]:
|
||||
pass
|
||||
|
||||
class Info:
|
||||
def __init__(self, start_index, text_with_context, distance, id):
|
||||
@ -58,7 +39,7 @@ class Info:
|
||||
elif parameters.get_new_dist_strategy() == parameters.DIST_ARITHMETIC_STRATEGY:
|
||||
# Arithmetic mean
|
||||
return (self.distance + other_info.distance) / 2
|
||||
else: # Min is default
|
||||
else: # Min is default
|
||||
return min(self.distance, other_info.distance)
|
||||
|
||||
def merge_with(self, other_info):
|
||||
@ -66,7 +47,7 @@ class Info:
|
||||
s2 = other_info.text_with_context
|
||||
s1_start = self.start_index
|
||||
s2_start = other_info.start_index
|
||||
|
||||
|
||||
new_dist = self.calculate_distance(other_info)
|
||||
|
||||
if self.should_merge(s1, s2, s1_start, s2_start):
|
||||
@ -84,55 +65,58 @@ class Info:
|
||||
return Info(s2_start, s2 + s1[overlap:], new_dist, other_info.id)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@staticmethod
|
||||
def should_merge(s1, s2, s1_start, s2_start):
|
||||
# Check if s1 and s2 are adjacent or overlapping
|
||||
s1_end = s1_start + len(s1)
|
||||
s2_end = s2_start + len(s2)
|
||||
|
||||
|
||||
return not (s1_end < s2_start or s2_end < s1_start)
|
||||
|
||||
class ChromaCollector(Collecter):
|
||||
def __init__(self, embedder: Embedder):
|
||||
super().__init__()
|
||||
|
||||
class ChromaCollector():
|
||||
def __init__(self):
|
||||
name = ''.join(random.choice('ab') for _ in range(10))
|
||||
|
||||
self.name = name
|
||||
self.chroma_client = chromadb.Client(Settings(anonymized_telemetry=False))
|
||||
self.embedder = embedder
|
||||
self.collection = self.chroma_client.create_collection(name="context", embedding_function=self.embedder.embed)
|
||||
self.collection = self.chroma_client.create_collection(name=name, embedding_function=embedder)
|
||||
|
||||
self.ids = []
|
||||
self.id_to_info = {}
|
||||
self.embeddings_cache = {}
|
||||
self.lock = threading.Lock() # Locking so the server doesn't break.
|
||||
self.lock = threading.Lock() # Locking so the server doesn't break.
|
||||
|
||||
def add(self, texts: list[str], texts_with_context: list[str], starting_indices: list[int], metadatas: list[dict] = None):
|
||||
with self.lock:
|
||||
assert metadatas is None or len(metadatas) == len(texts), "metadatas must be None or have the same length as texts"
|
||||
|
||||
if len(texts) == 0:
|
||||
|
||||
if len(texts) == 0:
|
||||
return
|
||||
|
||||
new_ids = self._get_new_ids(len(texts))
|
||||
|
||||
(existing_texts, existing_embeddings, existing_ids, existing_metas), \
|
||||
(non_existing_texts, non_existing_ids, non_existing_metas) = self._split_texts_by_cache_hit(texts, new_ids, metadatas)
|
||||
(non_existing_texts, non_existing_ids, non_existing_metas) = self._split_texts_by_cache_hit(texts, new_ids, metadatas)
|
||||
|
||||
# If there are any already existing texts, add them all at once.
|
||||
if existing_texts:
|
||||
logger.info(f'Adding {len(existing_embeddings)} cached embeddings.')
|
||||
args = {'embeddings': existing_embeddings, 'documents': existing_texts, 'ids': existing_ids}
|
||||
if metadatas is not None:
|
||||
if metadatas is not None:
|
||||
args['metadatas'] = existing_metas
|
||||
self.collection.add(**args)
|
||||
|
||||
# If there are any non-existing texts, compute their embeddings all at once. Each call to embed has significant overhead.
|
||||
if non_existing_texts:
|
||||
non_existing_embeddings = self.embedder.embed(non_existing_texts).tolist()
|
||||
non_existing_embeddings = embedder(non_existing_texts)
|
||||
for text, embedding in zip(non_existing_texts, non_existing_embeddings):
|
||||
self.embeddings_cache[text] = embedding
|
||||
|
||||
logger.info(f'Adding {len(non_existing_embeddings)} new embeddings.')
|
||||
args = {'embeddings': non_existing_embeddings, 'documents': non_existing_texts, 'ids': non_existing_ids}
|
||||
if metadatas is not None:
|
||||
if metadatas is not None:
|
||||
args['metadatas'] = non_existing_metas
|
||||
self.collection.add(**args)
|
||||
|
||||
@ -145,7 +129,6 @@ class ChromaCollector(Collecter):
|
||||
self.id_to_info.update(new_info)
|
||||
self.ids.extend(new_ids)
|
||||
|
||||
|
||||
def _split_texts_by_cache_hit(self, texts: list[str], new_ids: list[str], metadatas: list[dict]):
|
||||
existing_texts, non_existing_texts = [], []
|
||||
existing_embeddings = []
|
||||
@ -169,7 +152,6 @@ class ChromaCollector(Collecter):
|
||||
return (existing_texts, existing_embeddings, existing_ids, existing_metas), \
|
||||
(non_existing_texts, non_existing_ids, non_existing_metas)
|
||||
|
||||
|
||||
def _get_new_ids(self, num_new_ids: int):
|
||||
if self.ids:
|
||||
max_existing_id = max(int(id_) for id_ in self.ids)
|
||||
@ -178,7 +160,6 @@ class ChromaCollector(Collecter):
|
||||
|
||||
return [str(i + max_existing_id + 1) for i in range(num_new_ids)]
|
||||
|
||||
|
||||
def _find_min_max_start_index(self):
|
||||
max_index, min_index = 0, float('inf')
|
||||
for _, val in self.id_to_info.items():
|
||||
@ -188,34 +169,34 @@ class ChromaCollector(Collecter):
|
||||
min_index = val['start_index']
|
||||
return min_index, max_index
|
||||
|
||||
|
||||
# NB: Does not make sense to weigh excerpts from different documents.
|
||||
# NB: Does not make sense to weigh excerpts from different documents.
|
||||
# But let's say that's the user's problem. Perfect world scenario:
|
||||
# Apply time weighing to different documents. For each document, then, add
|
||||
# separate time weighing.
|
||||
|
||||
def _apply_sigmoid_time_weighing(self, infos: list[Info], document_len: int, time_steepness: float, time_power: float):
|
||||
sigmoid = lambda x: 1 / (1 + np.exp(-x))
|
||||
|
||||
def sigmoid(x):
|
||||
return 1 / (1 + np.exp(-x))
|
||||
|
||||
weights = sigmoid(time_steepness * np.linspace(-10, 10, document_len))
|
||||
|
||||
# Scale to [0,time_power] and shift it up to [1-time_power, 1]
|
||||
weights = weights - min(weights)
|
||||
weights = weights - min(weights)
|
||||
weights = weights * (time_power / max(weights))
|
||||
weights = weights + (1 - time_power)
|
||||
weights = weights + (1 - time_power)
|
||||
|
||||
# Reverse the weights
|
||||
weights = weights[::-1]
|
||||
weights = weights[::-1]
|
||||
|
||||
for info in infos:
|
||||
index = info.start_index
|
||||
info.distance *= weights[index]
|
||||
|
||||
|
||||
def _filter_outliers_by_median_distance(self, infos: list[Info], significant_level: float):
|
||||
# Ensure there are infos to filter
|
||||
if not infos:
|
||||
return []
|
||||
|
||||
|
||||
# Find info with minimum distance
|
||||
min_info = min(infos, key=lambda x: x.distance)
|
||||
|
||||
@ -231,7 +212,6 @@ class ChromaCollector(Collecter):
|
||||
|
||||
return filtered_infos
|
||||
|
||||
|
||||
def _merge_infos(self, infos: list[Info]):
|
||||
merged_infos = []
|
||||
current_info = infos[0]
|
||||
@ -247,8 +227,8 @@ class ChromaCollector(Collecter):
|
||||
merged_infos.append(current_info)
|
||||
return merged_infos
|
||||
|
||||
|
||||
# Main function for retrieving chunks by distance. It performs merging, time weighing, and mean filtering.
|
||||
|
||||
def _get_documents_ids_distances(self, search_strings: list[str], n_results: int):
|
||||
n_results = min(len(self.ids), n_results)
|
||||
if n_results == 0:
|
||||
@ -262,11 +242,11 @@ class ChromaCollector(Collecter):
|
||||
|
||||
for search_string in search_strings:
|
||||
result = self.collection.query(query_texts=search_string, n_results=math.ceil(n_results / len(search_strings)), include=['distances'])
|
||||
curr_infos = [Info(start_index=self.id_to_info[id]['start_index'],
|
||||
text_with_context=self.id_to_info[id]['text_with_context'],
|
||||
distance=distance, id=id)
|
||||
curr_infos = [Info(start_index=self.id_to_info[id]['start_index'],
|
||||
text_with_context=self.id_to_info[id]['text_with_context'],
|
||||
distance=distance, id=id)
|
||||
for id, distance in zip(result['ids'][0], result['distances'][0])]
|
||||
|
||||
|
||||
self._apply_sigmoid_time_weighing(infos=curr_infos, document_len=max_start_index - min_start_index + 1, time_steepness=parameters.get_time_steepness(), time_power=parameters.get_time_power())
|
||||
curr_infos = self._filter_outliers_by_median_distance(curr_infos, parameters.get_significant_level())
|
||||
infos.extend(curr_infos)
|
||||
@ -279,23 +259,23 @@ class ChromaCollector(Collecter):
|
||||
distances = [inf.distance for inf in infos]
|
||||
|
||||
return texts_with_context, ids, distances
|
||||
|
||||
|
||||
# Get chunks by similarity
|
||||
|
||||
def get(self, search_strings: list[str], n_results: int) -> list[str]:
|
||||
with self.lock:
|
||||
documents, _, _ = self._get_documents_ids_distances(search_strings, n_results)
|
||||
return documents
|
||||
|
||||
|
||||
# Get ids by similarity
|
||||
|
||||
def get_ids(self, search_strings: list[str], n_results: int) -> list[str]:
|
||||
with self.lock:
|
||||
_, ids, _ = self._get_documents_ids_distances(search_strings, n_results)
|
||||
return ids
|
||||
|
||||
|
||||
|
||||
# Cutoff token count
|
||||
|
||||
def _get_documents_up_to_token_count(self, documents: list[str], max_token_count: int):
|
||||
# TODO: Move to caller; We add delimiters there which might go over the limit.
|
||||
current_token_count = 0
|
||||
@ -308,7 +288,7 @@ class ChromaCollector(Collecter):
|
||||
# If adding this document would exceed the max token count,
|
||||
# truncate the document to fit within the limit.
|
||||
remaining_tokens = max_token_count - current_token_count
|
||||
|
||||
|
||||
truncated_doc = decode(doc_tokens[:remaining_tokens], skip_special_tokens=True)
|
||||
return_documents.append(truncated_doc)
|
||||
break
|
||||
@ -317,29 +297,28 @@ class ChromaCollector(Collecter):
|
||||
current_token_count += doc_token_count
|
||||
|
||||
return return_documents
|
||||
|
||||
|
||||
# Get chunks by similarity and then sort by ids
|
||||
|
||||
def get_sorted_by_ids(self, search_strings: list[str], n_results: int, max_token_count: int) -> list[str]:
|
||||
with self.lock:
|
||||
documents, ids, _ = self._get_documents_ids_distances(search_strings, n_results)
|
||||
sorted_docs = [x for _, x in sorted(zip(ids, documents))]
|
||||
|
||||
return self._get_documents_up_to_token_count(sorted_docs, max_token_count)
|
||||
|
||||
|
||||
|
||||
# Get chunks by similarity and then sort by distance (lowest distance is last).
|
||||
|
||||
def get_sorted_by_dist(self, search_strings: list[str], n_results: int, max_token_count: int) -> list[str]:
|
||||
with self.lock:
|
||||
documents, _, distances = self._get_documents_ids_distances(search_strings, n_results)
|
||||
sorted_docs = [doc for doc, _ in sorted(zip(documents, distances), key=lambda x: x[1])] # sorted lowest -> highest
|
||||
|
||||
sorted_docs = [doc for doc, _ in sorted(zip(documents, distances), key=lambda x: x[1])] # sorted lowest -> highest
|
||||
|
||||
# If a document is truncated or competely skipped, it would be with high distance.
|
||||
return_documents = self._get_documents_up_to_token_count(sorted_docs, max_token_count)
|
||||
return_documents.reverse() # highest -> lowest
|
||||
return_documents.reverse() # highest -> lowest
|
||||
|
||||
return return_documents
|
||||
|
||||
|
||||
def delete(self, ids_to_delete: list[str], where: dict):
|
||||
with self.lock:
|
||||
@ -354,23 +333,16 @@ class ChromaCollector(Collecter):
|
||||
|
||||
logger.info(f'Successfully deleted {len(ids_to_delete)} records from chromaDB.')
|
||||
|
||||
|
||||
def clear(self):
|
||||
with self.lock:
|
||||
self.chroma_client.reset()
|
||||
self.collection = self.chroma_client.create_collection("context", embedding_function=self.embedder.embed)
|
||||
|
||||
self.ids = []
|
||||
self.id_to_info = {}
|
||||
self.chroma_client.delete_collection(name=self.name)
|
||||
self.collection = self.chroma_client.create_collection(name=self.name, embedding_function=embedder)
|
||||
|
||||
logger.info('Successfully cleared all records and reset chromaDB.')
|
||||
|
||||
|
||||
class SentenceTransformerEmbedder(Embedder):
|
||||
def __init__(self) -> None:
|
||||
logger.debug('Creating Sentence Embedder...')
|
||||
self.model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
|
||||
self.embed = self.model.encode
|
||||
|
||||
|
||||
def make_collector():
|
||||
return ChromaCollector(SentenceTransformerEmbedder())
|
||||
return ChromaCollector()
|
||||
|
@ -11,32 +11,29 @@ This module contains utils for preprocessing the text before converting it to em
|
||||
* removing specific parts of speech (adverbs and interjections)
|
||||
- TextSummarizer extracts the most important sentences from a long string using text-ranking.
|
||||
"""
|
||||
import pytextrank
|
||||
import string
|
||||
import spacy
|
||||
import math
|
||||
import nltk
|
||||
import re
|
||||
import string
|
||||
|
||||
import nltk
|
||||
import spacy
|
||||
from nltk.corpus import stopwords
|
||||
from nltk.stem import WordNetLemmatizer
|
||||
from num2words import num2words
|
||||
|
||||
|
||||
class TextPreprocessorBuilder:
|
||||
# Define class variables as None initially
|
||||
# Define class variables as None initially
|
||||
_stop_words = set(stopwords.words('english'))
|
||||
_lemmatizer = WordNetLemmatizer()
|
||||
|
||||
|
||||
# Some of the functions are expensive. We cache the results.
|
||||
_lemmatizer_cache = {}
|
||||
_pos_remove_cache = {}
|
||||
|
||||
|
||||
def __init__(self, text: str):
|
||||
self.text = text
|
||||
|
||||
|
||||
def to_lower(self):
|
||||
# Match both words and non-word characters
|
||||
tokens = re.findall(r'\b\w+\b|\W+', self.text)
|
||||
@ -49,7 +46,6 @@ class TextPreprocessorBuilder:
|
||||
self.text = "".join(tokens)
|
||||
return self
|
||||
|
||||
|
||||
def num_to_word(self, min_len: int = 1):
|
||||
# Match both words and non-word characters
|
||||
tokens = re.findall(r'\b\w+\b|\W+', self.text)
|
||||
@ -58,11 +54,10 @@ class TextPreprocessorBuilder:
|
||||
if token.isdigit() and len(token) >= min_len:
|
||||
# This is done to pay better attention to numbers (e.g. ticket numbers, thread numbers, post numbers)
|
||||
# 740700 will become "seven hundred and forty thousand seven hundred".
|
||||
tokens[i] = num2words(int(token)).replace(",","") # Remove commas from num2words.
|
||||
tokens[i] = num2words(int(token)).replace(",", "") # Remove commas from num2words.
|
||||
self.text = "".join(tokens)
|
||||
return self
|
||||
|
||||
|
||||
def num_to_char_long(self, min_len: int = 1):
|
||||
# Match both words and non-word characters
|
||||
tokens = re.findall(r'\b\w+\b|\W+', self.text)
|
||||
@ -71,11 +66,13 @@ class TextPreprocessorBuilder:
|
||||
if token.isdigit() and len(token) >= min_len:
|
||||
# This is done to pay better attention to numbers (e.g. ticket numbers, thread numbers, post numbers)
|
||||
# 740700 will become HHHHHHEEEEEAAAAHHHAAA
|
||||
convert_token = lambda token: ''.join((chr(int(digit) + 65) * (i + 1)) for i, digit in enumerate(token[::-1]))[::-1]
|
||||
def convert_token(token):
|
||||
return ''.join((chr(int(digit) + 65) * (i + 1)) for i, digit in enumerate(token[::-1]))[::-1]
|
||||
|
||||
tokens[i] = convert_token(tokens[i])
|
||||
self.text = "".join(tokens)
|
||||
return self
|
||||
|
||||
|
||||
def num_to_char(self, min_len: int = 1):
|
||||
# Match both words and non-word characters
|
||||
tokens = re.findall(r'\b\w+\b|\W+', self.text)
|
||||
@ -87,15 +84,15 @@ class TextPreprocessorBuilder:
|
||||
tokens[i] = ''.join(chr(int(digit) + 65) for digit in token)
|
||||
self.text = "".join(tokens)
|
||||
return self
|
||||
|
||||
|
||||
def merge_spaces(self):
|
||||
self.text = re.sub(' +', ' ', self.text)
|
||||
return self
|
||||
|
||||
|
||||
def strip(self):
|
||||
self.text = self.text.strip()
|
||||
return self
|
||||
|
||||
|
||||
def remove_punctuation(self):
|
||||
self.text = self.text.translate(str.maketrans('', '', string.punctuation))
|
||||
return self
|
||||
@ -103,7 +100,7 @@ class TextPreprocessorBuilder:
|
||||
def remove_stopwords(self):
|
||||
self.text = "".join([word for word in re.findall(r'\b\w+\b|\W+', self.text) if word not in TextPreprocessorBuilder._stop_words])
|
||||
return self
|
||||
|
||||
|
||||
def remove_specific_pos(self):
|
||||
"""
|
||||
In the English language, adverbs and interjections rarely provide meaningul information.
|
||||
@ -140,7 +137,7 @@ class TextPreprocessorBuilder:
|
||||
if processed_text:
|
||||
self.text = processed_text
|
||||
return self
|
||||
|
||||
|
||||
new_text = "".join([TextPreprocessorBuilder._lemmatizer.lemmatize(word) for word in re.findall(r'\b\w+\b|\W+', self.text)])
|
||||
TextPreprocessorBuilder._lemmatizer_cache[self.text] = new_text
|
||||
self.text = new_text
|
||||
@ -150,6 +147,7 @@ class TextPreprocessorBuilder:
|
||||
def build(self):
|
||||
return self.text
|
||||
|
||||
|
||||
class TextSummarizer:
|
||||
_nlp_pipeline = None
|
||||
_cache = {}
|
||||
@ -165,7 +163,7 @@ class TextSummarizer:
|
||||
@staticmethod
|
||||
def process_long_text(text: str, min_num_sent: int) -> list[str]:
|
||||
"""
|
||||
This function applies a text summarization process on a given text string, extracting
|
||||
This function applies a text summarization process on a given text string, extracting
|
||||
the most important sentences based on the principle that 20% of the content is responsible
|
||||
for 80% of the meaning (the Pareto Principle).
|
||||
|
||||
@ -193,7 +191,7 @@ class TextSummarizer:
|
||||
|
||||
else:
|
||||
result = [text]
|
||||
|
||||
|
||||
# Store the result in cache before returning it
|
||||
TextSummarizer._cache[cache_key] = result
|
||||
return result
|
||||
return result
|
||||
|
@ -1,16 +1,17 @@
|
||||
"""
|
||||
This module is responsible for processing the corpus and feeding it into chromaDB. It will receive a corpus of text.
|
||||
This module is responsible for processing the corpus and feeding it into chromaDB. It will receive a corpus of text.
|
||||
It will then split it into chunks of specified length. For each of those chunks, it will append surrounding context.
|
||||
It will only include full words.
|
||||
"""
|
||||
|
||||
import re
|
||||
import bisect
|
||||
import re
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
|
||||
from .data_preprocessor import TextPreprocessorBuilder, TextSummarizer
|
||||
from .chromadb import ChromaCollector
|
||||
from .data_preprocessor import TextPreprocessorBuilder, TextSummarizer
|
||||
|
||||
|
||||
def preprocess_text_no_summary(text) -> str:
|
||||
builder = TextPreprocessorBuilder(text)
|
||||
@ -42,7 +43,7 @@ def preprocess_text_no_summary(text) -> str:
|
||||
builder.num_to_char(parameters.get_min_num_length())
|
||||
elif parameters.get_num_conversion_strategy() == parameters.NUM_TO_CHAR_LONG_METHOD:
|
||||
builder.num_to_char_long(parameters.get_min_num_length())
|
||||
|
||||
|
||||
return builder.build()
|
||||
|
||||
|
||||
@ -53,10 +54,10 @@ def preprocess_text(text) -> list[str]:
|
||||
|
||||
def _create_chunks_with_context(corpus, chunk_len, context_left, context_right):
|
||||
"""
|
||||
This function takes a corpus of text and splits it into chunks of a specified length,
|
||||
then adds a specified amount of context to each chunk. The context is added by first
|
||||
going backwards from the start of the chunk and then going forwards from the end of the
|
||||
chunk, ensuring that the context includes only whole words and that the total context length
|
||||
This function takes a corpus of text and splits it into chunks of a specified length,
|
||||
then adds a specified amount of context to each chunk. The context is added by first
|
||||
going backwards from the start of the chunk and then going forwards from the end of the
|
||||
chunk, ensuring that the context includes only whole words and that the total context length
|
||||
does not exceed the specified limit. This function uses binary search for efficiency.
|
||||
|
||||
Returns:
|
||||
@ -102,7 +103,7 @@ def _create_chunks_with_context(corpus, chunk_len, context_left, context_right):
|
||||
# Combine all the words in the context range (before, chunk, and after)
|
||||
chunk_with_context = ''.join(words[context_start_index:context_end_index])
|
||||
chunks_with_context.append(chunk_with_context)
|
||||
|
||||
|
||||
# Determine the start index of the chunk with context
|
||||
chunk_with_context_start_index = word_start_indices[context_start_index]
|
||||
chunk_with_context_start_indices.append(chunk_with_context_start_index)
|
||||
@ -125,9 +126,9 @@ def _clear_chunks(data_chunks, data_chunks_with_context, data_chunk_starting_ind
|
||||
seen_chunk_start = seen_chunks.get(chunk)
|
||||
if seen_chunk_start:
|
||||
# If we've already seen this exact chunk, and the context around it it very close to the seen chunk, then skip it.
|
||||
if abs(seen_chunk_start-index) < parameters.get_delta_start():
|
||||
if abs(seen_chunk_start - index) < parameters.get_delta_start():
|
||||
continue
|
||||
|
||||
|
||||
distinct_data_chunks.append(chunk)
|
||||
distinct_data_chunks_with_context.append(context)
|
||||
distinct_data_chunk_starting_indices.append(index)
|
||||
@ -206,4 +207,4 @@ def process_and_add_to_collector(corpus: str, collector: ChromaCollector, clear_
|
||||
|
||||
if clear_collector_before_adding:
|
||||
collector.clear()
|
||||
collector.add(data_chunks, data_chunks_with_context, data_chunk_starting_indices, [metadata]*len(data_chunks) if metadata is not None else None)
|
||||
collector.add(data_chunks, data_chunks_with_context, data_chunk_starting_indices, [metadata] * len(data_chunks) if metadata is not None else None)
|
||||
|
@ -1,7 +1,7 @@
|
||||
import concurrent.futures
|
||||
import requests
|
||||
import re
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
@ -9,6 +9,7 @@ import extensions.superboogav2.parameters as parameters
|
||||
from .data_processor import process_and_add_to_collector
|
||||
from .utils import create_metadata_source
|
||||
|
||||
|
||||
def _download_single(url):
|
||||
response = requests.get(url, timeout=5)
|
||||
if response.status_code == 200:
|
||||
@ -62,4 +63,4 @@ def feed_url_into_collector(urls, collector):
|
||||
text = '\n'.join([s.strip() for s in strings])
|
||||
all_text += text
|
||||
|
||||
process_and_add_to_collector(all_text, collector, False, create_metadata_source('url-download'))
|
||||
process_and_add_to_collector(all_text, collector, False, create_metadata_source('url-download'))
|
||||
|
@ -4,13 +4,12 @@ This module is responsible for handling and modifying the notebook text.
|
||||
import re
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
|
||||
from modules import shared
|
||||
from modules.logging_colors import logger
|
||||
from extensions.superboogav2.utils import create_context_text
|
||||
from modules.logging_colors import logger
|
||||
|
||||
from .data_processor import preprocess_text
|
||||
|
||||
|
||||
def _remove_special_tokens(string):
|
||||
pattern = r'(<\|begin-user-input\|>|<\|end-user-input\|>|<\|injection-point\|>)'
|
||||
return re.sub(pattern, '', string)
|
||||
@ -37,4 +36,4 @@ def input_modifier_internal(string, collector, is_chat):
|
||||
# Make the injection
|
||||
string = string.replace('<|injection-point|>', create_context_text(results))
|
||||
|
||||
return _remove_special_tokens(string)
|
||||
return _remove_special_tokens(string)
|
||||
|
@ -3,22 +3,24 @@ This module implements a hyperparameter optimization routine for the embedding a
|
||||
|
||||
Each run, the optimizer will set the default values inside the hyperparameters. At the end, it will output the best ones it has found.
|
||||
"""
|
||||
import re
|
||||
import hashlib
|
||||
import json
|
||||
import optuna
|
||||
import logging
|
||||
import re
|
||||
|
||||
import gradio as gr
|
||||
import numpy as np
|
||||
import logging
|
||||
import hashlib
|
||||
logging.getLogger('optuna').setLevel(logging.WARNING)
|
||||
import optuna
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
logging.getLogger('optuna').setLevel(logging.WARNING)
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
from modules.logging_colors import logger
|
||||
|
||||
from .benchmark import benchmark
|
||||
from .parameters import Parameters
|
||||
from modules.logging_colors import logger
|
||||
|
||||
|
||||
# Format the parameters into markdown format.
|
||||
@ -28,7 +30,7 @@ def _markdown_hyperparams():
|
||||
# Escape any markdown syntax
|
||||
param_name = re.sub(r"([_*\[\]()~`>#+-.!])", r"\\\1", param_name)
|
||||
param_value_default = re.sub(r"([_*\[\]()~`>#+-.!])", r"\\\1", str(param_value['default'])) if param_value['default'] else ' '
|
||||
|
||||
|
||||
res.append('* {}: **{}**'.format(param_name, param_value_default))
|
||||
|
||||
return '\n'.join(res)
|
||||
@ -49,13 +51,13 @@ def _convert_np_types(params):
|
||||
# Set the default values for the hyperparameters.
|
||||
def _set_hyperparameters(params):
|
||||
for param_name, param_value in params.items():
|
||||
if param_name in Parameters.getInstance().hyperparameters:
|
||||
if param_name in Parameters.getInstance().hyperparameters:
|
||||
Parameters.getInstance().hyperparameters[param_name]['default'] = param_value
|
||||
|
||||
|
||||
# Check if the parameter is for optimization.
|
||||
def _is_optimization_param(val):
|
||||
is_opt = val.get('should_optimize', False) # Either does not exist or is false
|
||||
is_opt = val.get('should_optimize', False) # Either does not exist or is false
|
||||
return is_opt
|
||||
|
||||
|
||||
@ -67,7 +69,7 @@ def _get_params_hash(params):
|
||||
|
||||
def optimize(collector, progress=gr.Progress()):
|
||||
# Inform the user that something is happening.
|
||||
progress(0, desc=f'Setting Up...')
|
||||
progress(0, desc='Setting Up...')
|
||||
|
||||
# Track the current step
|
||||
current_step = 0
|
||||
@ -132,4 +134,4 @@ def optimize(collector, progress=gr.Progress()):
|
||||
with open('best_params.json', 'w') as fp:
|
||||
json.dump(_convert_np_types(best_params), fp, indent=4)
|
||||
|
||||
return str_result
|
||||
return str_result
|
||||
|
@ -1,18 +1,16 @@
|
||||
"""
|
||||
This module provides a singleton class `Parameters` that is used to manage all hyperparameters for the embedding application.
|
||||
This module provides a singleton class `Parameters` that is used to manage all hyperparameters for the embedding application.
|
||||
It expects a JSON file in `extensions/superboogav2/config.json`.
|
||||
|
||||
Each element in the JSON must have a `default` value which will be used for the current run. Elements can have `categories`.
|
||||
These categories define the range in which the optimizer will search. If the element is tagged with `"should_optimize": false`,
|
||||
Each element in the JSON must have a `default` value which will be used for the current run. Elements can have `categories`.
|
||||
These categories define the range in which the optimizer will search. If the element is tagged with `"should_optimize": false`,
|
||||
then the optimizer will only ever use the default value.
|
||||
"""
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import json
|
||||
|
||||
from modules.logging_colors import logger
|
||||
|
||||
|
||||
NUM_TO_WORD_METHOD = 'Number to Word'
|
||||
NUM_TO_CHAR_METHOD = 'Number to Char'
|
||||
NUM_TO_CHAR_LONG_METHOD = 'Number to Multi-Char'
|
||||
@ -366,4 +364,4 @@ def set_api_port(value: int):
|
||||
|
||||
|
||||
def set_api_on(value: bool):
|
||||
Parameters.getInstance().hyperparameters['api_on']['default'] = value
|
||||
Parameters.getInstance().hyperparameters['api_on']['default'] = value
|
||||
|
@ -1,5 +1,5 @@
|
||||
beautifulsoup4==4.12.2
|
||||
chromadb==0.3.18
|
||||
chromadb==0.4.24
|
||||
lxml
|
||||
optuna
|
||||
pandas==2.0.3
|
||||
@ -7,4 +7,4 @@ posthog==2.4.2
|
||||
sentence_transformers==2.2.2
|
||||
spacy
|
||||
pytextrank
|
||||
num2words
|
||||
num2words
|
||||
|
@ -7,28 +7,29 @@ from pathlib import Path
|
||||
# Point to where nltk will find the required data.
|
||||
os.environ['NLTK_DATA'] = str(Path("extensions/superboogav2/nltk_data").resolve())
|
||||
|
||||
import textwrap
|
||||
import codecs
|
||||
import textwrap
|
||||
|
||||
import gradio as gr
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
|
||||
from modules.logging_colors import logger
|
||||
from modules import shared
|
||||
from modules.logging_colors import logger
|
||||
|
||||
from .utils import create_metadata_source
|
||||
from .chromadb import make_collector
|
||||
from .download_urls import feed_url_into_collector
|
||||
from .data_processor import process_and_add_to_collector
|
||||
from .benchmark import benchmark
|
||||
from .optimize import optimize
|
||||
from .notebook_handler import input_modifier_internal
|
||||
from .chat_handler import custom_generate_chat_prompt_internal
|
||||
from .api import APIManager
|
||||
from .benchmark import benchmark
|
||||
from .chat_handler import custom_generate_chat_prompt_internal
|
||||
from .chromadb import make_collector
|
||||
from .data_processor import process_and_add_to_collector
|
||||
from .download_urls import feed_url_into_collector
|
||||
from .notebook_handler import input_modifier_internal
|
||||
from .optimize import optimize
|
||||
from .utils import create_metadata_source
|
||||
|
||||
collector = None
|
||||
api_manager = None
|
||||
|
||||
|
||||
def setup():
|
||||
global collector
|
||||
global api_manager
|
||||
@ -38,6 +39,7 @@ def setup():
|
||||
if parameters.get_api_on():
|
||||
api_manager.start_server(parameters.get_api_port())
|
||||
|
||||
|
||||
def _feed_data_into_collector(corpus):
|
||||
yield '### Processing data...'
|
||||
process_and_add_to_collector(corpus, collector, False, create_metadata_source('direct-text'))
|
||||
@ -87,7 +89,7 @@ def _get_optimizable_settings() -> list:
|
||||
preprocess_pipeline.append('Merge Spaces')
|
||||
if parameters.should_strip():
|
||||
preprocess_pipeline.append('Strip Edges')
|
||||
|
||||
|
||||
return [
|
||||
parameters.get_time_power(),
|
||||
parameters.get_time_steepness(),
|
||||
@ -104,8 +106,8 @@ def _get_optimizable_settings() -> list:
|
||||
]
|
||||
|
||||
|
||||
def _apply_settings(optimization_steps, time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
|
||||
preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, postfix, data_separator, prefix, max_token_count,
|
||||
def _apply_settings(optimization_steps, time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
|
||||
preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, postfix, data_separator, prefix, max_token_count,
|
||||
chunk_count, chunk_sep, context_len, chunk_regex, chunk_len, threads, strong_cleanup):
|
||||
logger.debug('Applying settings.')
|
||||
|
||||
@ -240,7 +242,7 @@ def ui():
|
||||
with gr.Tab("File input"):
|
||||
file_input = gr.File(label='Input file', type='binary')
|
||||
update_file = gr.Button('Load data')
|
||||
|
||||
|
||||
with gr.Tab("Settings"):
|
||||
with gr.Accordion("Processing settings", open=True):
|
||||
chunk_len = gr.Textbox(value=parameters.get_chunk_len(), label='Chunk length', info='In characters, not tokens. This value is used when you click on "Load data".')
|
||||
@ -305,19 +307,16 @@ def ui():
|
||||
optimize_button = gr.Button('Optimize')
|
||||
optimization_steps = gr.Number(value=parameters.get_optimization_steps(), label='Optimization Steps', info='For how many steps to optimize.', interactive=True)
|
||||
|
||||
|
||||
clear_button = gr.Button('❌ Clear Data')
|
||||
|
||||
|
||||
with gr.Column():
|
||||
last_updated = gr.Markdown()
|
||||
|
||||
all_params = [optimization_steps, time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
|
||||
preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, postfix, data_separator, prefix, max_token_count,
|
||||
all_params = [optimization_steps, time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
|
||||
preprocess_pipeline, api_port, api_on, injection_strategy, add_chat_to_data, manual, postfix, data_separator, prefix, max_token_count,
|
||||
chunk_count, chunk_sep, context_len, chunk_regex, chunk_len, threads, strong_cleanup]
|
||||
optimizable_params = [time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
|
||||
preprocess_pipeline, chunk_count, context_len, chunk_len]
|
||||
|
||||
optimizable_params = [time_power, time_steepness, significant_level, min_sentences, new_dist_strat, delta_start, min_number_length, num_conversion,
|
||||
preprocess_pipeline, chunk_count, context_len, chunk_len]
|
||||
|
||||
update_data.click(_feed_data_into_collector, [data_input], last_updated, show_progress=False)
|
||||
update_url.click(_feed_url_into_collector, [url_input], last_updated, show_progress=False)
|
||||
@ -326,7 +325,6 @@ def ui():
|
||||
optimize_button.click(_begin_optimization, [], [last_updated] + optimizable_params, show_progress=True)
|
||||
clear_button.click(_clear_data, [], last_updated, show_progress=False)
|
||||
|
||||
|
||||
optimization_steps.input(fn=_apply_settings, inputs=all_params, show_progress=False)
|
||||
time_power.input(fn=_apply_settings, inputs=all_params, show_progress=False)
|
||||
time_steepness.input(fn=_apply_settings, inputs=all_params, show_progress=False)
|
||||
@ -352,4 +350,4 @@ def ui():
|
||||
chunk_regex.input(fn=_apply_settings, inputs=all_params, show_progress=False)
|
||||
chunk_len.input(fn=_apply_settings, inputs=all_params, show_progress=False)
|
||||
threads.input(fn=_apply_settings, inputs=all_params, show_progress=False)
|
||||
strong_cleanup.input(fn=_apply_settings, inputs=all_params, show_progress=False)
|
||||
strong_cleanup.input(fn=_apply_settings, inputs=all_params, show_progress=False)
|
||||
|
@ -4,6 +4,7 @@ This module contains common functions across multiple other modules.
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
|
||||
|
||||
# Create the context using the prefix + data_separator + postfix from parameters.
|
||||
def create_context_text(results):
|
||||
context = parameters.get_prefix() + parameters.get_data_separator().join(results) + parameters.get_postfix()
|
||||
@ -13,4 +14,4 @@ def create_context_text(results):
|
||||
|
||||
# Create metadata with the specified source
|
||||
def create_metadata_source(source: str):
|
||||
return {'source': source}
|
||||
return {'source': source}
|
||||
|
114
modules/cache_utils.py
Normal file
114
modules/cache_utils.py
Normal file
@ -0,0 +1,114 @@
|
||||
import torch
|
||||
from numba import njit
|
||||
|
||||
from modules import shared
|
||||
|
||||
|
||||
def process_llamacpp_cache(model, new_sequence, past_sequence):
|
||||
if len(past_sequence) == 0 or len(new_sequence) == 0:
|
||||
return past_sequence
|
||||
|
||||
i1, i2, j1, j2 = find_longest_common_substring_indices(past_sequence, new_sequence)
|
||||
overlap_length = i2 - i1 + 1
|
||||
|
||||
# Do StreamingLLM if i1 > 0 (ie the longest common subsequence is not a prefix)
|
||||
# and the overlap length is sufficiently long.
|
||||
if i1 > 0 and overlap_length > 0.2 * len(new_sequence):
|
||||
|
||||
new_sequence = torch.tensor(new_sequence)
|
||||
past_sequence = torch.tensor(past_sequence)
|
||||
|
||||
prefix_length = find_prefix_length(past_sequence[:i1], new_sequence[:j1])
|
||||
sink_length = prefix_length
|
||||
if sink_length < shared.args.attention_sink_size:
|
||||
sink_length = shared.args.attention_sink_size
|
||||
|
||||
removed_length = i1 - sink_length
|
||||
|
||||
matching_prefix = past_sequence[:prefix_length]
|
||||
removed_chunk = past_sequence[sink_length:i1]
|
||||
overlapping_sequence = new_sequence[j1:j2 + 1]
|
||||
added_chunk = new_sequence[j2 + 1:]
|
||||
|
||||
# print(past_sequence)
|
||||
# print(new_sequence)
|
||||
|
||||
print()
|
||||
print('MATCHING PREFIX=', repr(shared.tokenizer.decode(matching_prefix)))
|
||||
print('ADDED CHUNK=', repr(shared.tokenizer.decode(added_chunk)))
|
||||
print('REMOVED CHUNK=', repr(shared.tokenizer.decode(removed_chunk)))
|
||||
print()
|
||||
|
||||
# Remove interval [sink_length, sink_length + removed_length) from the context
|
||||
# Subtract removed_length from model.n_tokens
|
||||
model._ctx.kv_cache_seq_rm(0, sink_length, sink_length + removed_length)
|
||||
model._ctx.kv_cache_seq_shift(0, sink_length + removed_length, -1, -removed_length)
|
||||
|
||||
new_sequence = new_sequence.tolist()
|
||||
model.input_ids[:j2 + 1] = new_sequence[:j2 + 1]
|
||||
model.n_tokens = j2 + 1
|
||||
|
||||
return new_sequence[:j2 + 1]
|
||||
else:
|
||||
return past_sequence
|
||||
|
||||
|
||||
def find_prefix_length(past_seq, seq_tensor):
|
||||
'''
|
||||
Given two torch tensors, finds the length of the longest
|
||||
common prefix between the two.
|
||||
'''
|
||||
min_length = min(past_seq.shape[0], seq_tensor.shape[0])
|
||||
indices = torch.nonzero(~torch.eq(past_seq[:min_length], seq_tensor[:min_length]))
|
||||
if len(indices) > 0:
|
||||
prefix_length = indices[0].item()
|
||||
else:
|
||||
prefix_length = min_length
|
||||
|
||||
return prefix_length
|
||||
|
||||
|
||||
@njit
|
||||
def find_longest_common_substring_indices(list1, list2):
|
||||
'''
|
||||
Given two lists, solves the Longest Common Substring problem.
|
||||
|
||||
It returns the indices where the substring starts and ends in
|
||||
s1 and s2.
|
||||
|
||||
Example:
|
||||
|
||||
ir, jr, ir2, jr2 = find_longest_common_substring_indices(s1, s2)
|
||||
print(s1[ir:jr + 1])
|
||||
print(s2[ir2:jr2 + 1])
|
||||
|
||||
Adapted from
|
||||
https://rosettacode.org/wiki/Longest_common_substring#Python
|
||||
'''
|
||||
|
||||
len_list1, len_list2 = len(list1), len(list2)
|
||||
start_index_list1, end_index_list1 = 0, -1
|
||||
start_index_list2, end_index_list2 = 0, -1
|
||||
|
||||
# for index1 in tqdm(range(0, len_list1), desc="StreamingLLM prompt comparison", leave=False):
|
||||
for index1 in range(0, len_list1):
|
||||
try:
|
||||
index2 = list2.index(list1[index1])
|
||||
except:
|
||||
continue
|
||||
|
||||
while index2 >= 0:
|
||||
temp_index1, temp_index2 = index1, index2
|
||||
while temp_index1 < len_list1 and temp_index2 < len_list2 and list2[temp_index2] == list1[temp_index1]:
|
||||
if temp_index1 - index1 >= end_index_list1 - start_index_list1:
|
||||
start_index_list1, end_index_list1 = index1, temp_index1
|
||||
start_index_list2, end_index_list2 = index2, temp_index2
|
||||
|
||||
temp_index1 += 1
|
||||
temp_index2 += 1
|
||||
try:
|
||||
index2 = list2.index(list1[index1], index2 + 1)
|
||||
except:
|
||||
break
|
||||
|
||||
return start_index_list1, end_index_list1, start_index_list2, end_index_list2
|
@ -197,16 +197,16 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
||||
while right - left > 1:
|
||||
mid = (left + right) // 2
|
||||
|
||||
messages[-1]['content'] = user_message[mid:]
|
||||
messages[-1]['content'] = user_message[:mid]
|
||||
prompt = make_prompt(messages)
|
||||
encoded_length = get_encoded_length(prompt)
|
||||
|
||||
if encoded_length <= max_length:
|
||||
right = mid
|
||||
else:
|
||||
left = mid
|
||||
else:
|
||||
right = mid
|
||||
|
||||
messages[-1]['content'] = user_message[right:]
|
||||
messages[-1]['content'] = user_message[:left]
|
||||
prompt = make_prompt(messages)
|
||||
encoded_length = get_encoded_length(prompt)
|
||||
if encoded_length > max_length:
|
||||
|
@ -2,6 +2,9 @@ from typing import Sequence
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
from modules import shared
|
||||
from modules.cache_utils import process_llamacpp_cache
|
||||
|
||||
try:
|
||||
import llama_cpp
|
||||
except:
|
||||
@ -58,6 +61,25 @@ def eval_with_progress(self, tokens: Sequence[int]):
|
||||
self.n_tokens += n_tokens
|
||||
|
||||
|
||||
def monkey_patch_generate(lib):
|
||||
|
||||
def my_generate(self, *args, **kwargs):
|
||||
|
||||
if shared.args.streaming_llm:
|
||||
new_sequence = args[0]
|
||||
past_sequence = self._input_ids
|
||||
|
||||
# Do the cache trimming for StreamingLLM
|
||||
process_llamacpp_cache(self, new_sequence, past_sequence)
|
||||
|
||||
for output in self.original_generate(*args, **kwargs):
|
||||
yield output
|
||||
|
||||
lib.Llama.original_generate = lib.Llama.generate
|
||||
lib.Llama.generate = my_generate
|
||||
|
||||
|
||||
for lib in [llama_cpp, llama_cpp_cuda, llama_cpp_cuda_tensorcores]:
|
||||
if lib is not None:
|
||||
lib.Llama.eval = eval_with_progress
|
||||
monkey_patch_generate(lib)
|
||||
|
@ -46,6 +46,8 @@ loaders_and_params = OrderedDict({
|
||||
'no_offload_kqv',
|
||||
'row_split',
|
||||
'tensorcores',
|
||||
'streaming_llm',
|
||||
'attention_sink_size',
|
||||
],
|
||||
'llamacpp_HF': [
|
||||
'n_ctx',
|
||||
@ -69,6 +71,8 @@ loaders_and_params = OrderedDict({
|
||||
'no_offload_kqv',
|
||||
'row_split',
|
||||
'tensorcores',
|
||||
'streaming_llm',
|
||||
'attention_sink_size',
|
||||
'llamacpp_HF_info',
|
||||
],
|
||||
'ExLlamav2_HF': [
|
||||
|
@ -130,6 +130,8 @@ group.add_argument('--logits_all', action='store_true', help='Needs to be set fo
|
||||
group.add_argument('--no_offload_kqv', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
||||
group.add_argument('--cache-capacity', type=str, help='Maximum cache capacity (llama-cpp-python). Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed.')
|
||||
group.add_argument('--row_split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
||||
group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
group.add_argument('--attention-sink-size', type=int, default=5, help='StreamingLLM: number of sink tokens. Only used if the trimmed prompt does not share a prefix with the old prompt.')
|
||||
|
||||
# ExLlamaV2
|
||||
group = parser.add_argument_group('ExLlamaV2')
|
||||
|
@ -13,6 +13,7 @@ import transformers
|
||||
from transformers import LogitsProcessorList, is_torch_xpu_available
|
||||
|
||||
import modules.shared as shared
|
||||
from modules.cache_utils import process_llamacpp_cache
|
||||
from modules.callbacks import (
|
||||
Iteratorize,
|
||||
Stream,
|
||||
@ -364,6 +365,12 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
|
||||
print(decode(input_ids[0], skip_special_tokens=False))
|
||||
print()
|
||||
|
||||
# Handle StreamingLLM for llamacpp_HF
|
||||
if shared.model.__class__.__name__ == 'LlamacppHF' and shared.args.streaming_llm:
|
||||
tmp = process_llamacpp_cache(shared.model.model, input_ids[-1].tolist(), shared.model.model._input_ids.tolist())
|
||||
shared.model.past_seq = torch.tensor(tmp)
|
||||
shared.model.save_cache()
|
||||
|
||||
t0 = time.time()
|
||||
try:
|
||||
if not is_chat and not shared.is_seq2seq:
|
||||
|
@ -97,6 +97,8 @@ def list_model_elements():
|
||||
'no_offload_kqv',
|
||||
'row_split',
|
||||
'tensorcores',
|
||||
'streaming_llm',
|
||||
'attention_sink_size',
|
||||
'hqq_backend',
|
||||
]
|
||||
if is_torch_xpu_available():
|
||||
|
@ -88,6 +88,9 @@ def create_ui():
|
||||
with gr.Row():
|
||||
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=16, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=False, elem_classes=['add_scrollbar'])
|
||||
|
||||
|
||||
def create_chat_settings_ui():
|
||||
mu = shared.args.multi_user
|
||||
@ -133,7 +136,6 @@ def create_chat_settings_ui():
|
||||
|
||||
with gr.Column():
|
||||
shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace'])
|
||||
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=4, label='Command for chat-instruct mode', info='<|character|> gets replaced by the bot name, and <|prompt|> gets replaced by the regular chat prompt.', elem_classes=['add_scrollbar'])
|
||||
|
||||
with gr.Tab('Chat history'):
|
||||
with gr.Row():
|
||||
@ -293,7 +295,7 @@ def create_event_handlers():
|
||||
lambda: None, None, None, _js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
|
||||
|
||||
shared.gradio['mode'].change(
|
||||
lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then(
|
||||
lambda x: [gr.update(visible=x != 'instruct'), gr.update(visible=x == 'chat-instruct')], gradio('mode'), gradio('chat_style', 'chat-instruct_command'), show_progress=False).then(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
chat.load_latest_history, gradio('interface_state'), gradio('history')).then(
|
||||
chat.redraw_html, gradio(reload_arr), gradio('display')).then(
|
||||
|
@ -117,6 +117,8 @@ def create_ui():
|
||||
shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
|
||||
shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
|
||||
shared.gradio['tensorcores'] = gr.Checkbox(label="tensorcores", value=shared.args.tensorcores, info='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This increases performance on RTX cards.')
|
||||
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='(experimental) Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
shared.gradio['attention_sink_size'] = gr.Number(label="attention_sink_size", value=shared.args.attention_sink_size, info='StreamingLLM: number of sink tokens. Only used if the trimmed prompt doesn\'t share a prefix with the old prompt.')
|
||||
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
|
||||
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
||||
shared.gradio['no_offload_kqv'] = gr.Checkbox(label="no_offload_kqv", value=shared.args.no_offload_kqv, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
||||
@ -292,7 +294,7 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
|
||||
downloader.check_model_files(model, branch, links, sha256, output_folder)
|
||||
progress(1.0)
|
||||
else:
|
||||
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
|
||||
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}`")
|
||||
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
|
||||
|
||||
yield (f"Model successfully saved to `{output_folder}/`.")
|
||||
|
@ -76,7 +76,7 @@ def create_ui(default_preset):
|
||||
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
|
||||
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
|
||||
|
||||
shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
|
||||
shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
|
||||
shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Custom token bans', info='Specific token IDs to ban from generating, comma-separated. The IDs can be found in the Default or Notebook tab.')
|
||||
|
||||
with gr.Column():
|
||||
|
@ -1,4 +1,6 @@
|
||||
accelerate==0.27.*
|
||||
aqlm[gpu,cpu]==1.1.0; platform_system == "Linux"
|
||||
bitsandbytes==0.43.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
@ -7,6 +9,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
@ -29,27 +32,23 @@ flask_cloudflared==0.0.14
|
||||
sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# bitsandbytes
|
||||
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||
https://github.com/oobabooga/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.42.0-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
||||
# llama-cpp-python (CUDA, no tensor cores)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.56+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.56+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.56+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.56+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
||||
# llama-cpp-python (CUDA, tensor cores)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.56+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.56+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.56+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.56+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
@ -70,4 +69,4 @@ https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu121-py3-none-any.whl
|
||||
autoawq==0.1.8; platform_system == "Linux" or platform_system == "Windows"
|
||||
autoawq==0.2.3; platform_system == "Linux" or platform_system == "Windows"
|
||||
|
@ -7,6 +7,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
@ -30,14 +31,14 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.55+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.55+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.56+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.56+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
@ -45,3 +46,5 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.0.15/exllamav2-0.0.1
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
@ -7,6 +7,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
@ -30,10 +31,10 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
@ -43,3 +44,5 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.0.15/exllamav2-0.0.1
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.15/exllamav2-0.0.15-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+rocm561-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+rocm561-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
@ -7,6 +7,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
@ -30,10 +31,10 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14-py3-none-any.whl
|
||||
|
@ -7,6 +7,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
@ -30,12 +31,12 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.55-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.56-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.14/exllamav2-0.0.14-py3-none-any.whl
|
||||
|
@ -7,6 +7,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
@ -30,7 +31,7 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
@ -7,6 +7,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
@ -30,7 +31,7 @@ sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
@ -1,4 +1,6 @@
|
||||
accelerate==0.27.*
|
||||
aqlm[gpu,cpu]==1.1.0; platform_system == "Linux"
|
||||
bitsandbytes==0.43.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
@ -7,6 +9,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
@ -29,27 +32,23 @@ flask_cloudflared==0.0.14
|
||||
sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# bitsandbytes
|
||||
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||
https://github.com/oobabooga/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.42.0-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
||||
# llama-cpp-python (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.55+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.56+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
|
||||
# llama-cpp-python (CUDA, no tensor cores)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.55+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.56+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.56+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.56+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.56+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
||||
# llama-cpp-python (CUDA, tensor cores)
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.55+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.56+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.56+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.56+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.56+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
@ -70,4 +69,4 @@ https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX/ctransformers-0.2.27+cu121-py3-none-any.whl
|
||||
autoawq==0.1.8; platform_system == "Linux" or platform_system == "Windows"
|
||||
autoawq==0.2.3; platform_system == "Linux" or platform_system == "Windows"
|
||||
|
@ -7,6 +7,7 @@ hqq==0.1.5
|
||||
jinja2==3.1.2
|
||||
lm_eval==0.3.0
|
||||
markdown
|
||||
numba==0.59.*
|
||||
numpy==1.26.*
|
||||
optimum==1.17.*
|
||||
pandas
|
||||
|
@ -1,4 +1,5 @@
|
||||
@echo off
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
cd /D "%~dp0"
|
||||
|
||||
@ -25,6 +26,7 @@ set INSTALL_DIR=%cd%\installer_files
|
||||
set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
|
||||
set INSTALL_ENV_DIR=%cd%\installer_files\env
|
||||
set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe
|
||||
set MINICONDA_CHECKSUM=307194e1f12bbeb52b083634e89cc67db4f7980bd542254b43d3309eaf7cb358
|
||||
set conda_exists=F
|
||||
|
||||
@rem figure out whether git and conda needs to be installed
|
||||
@ -39,6 +41,18 @@ if "%conda_exists%" == "F" (
|
||||
mkdir "%INSTALL_DIR%"
|
||||
call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
|
||||
|
||||
for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do (
|
||||
set "output=%%a"
|
||||
)
|
||||
|
||||
if not defined output (
|
||||
echo The checksum verification for miniconda_installer.exe has failed.
|
||||
del "%INSTALL_DIR%\miniconda_installer.exe"
|
||||
goto end
|
||||
) else (
|
||||
echo The checksum verification for miniconda_installer.exe has passed successfully.
|
||||
)
|
||||
|
||||
echo Installing Miniconda to %CONDA_ROOT_PREFIX%
|
||||
start /wait "" "%INSTALL_DIR%\miniconda_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%
|
||||
|
||||
@ -46,8 +60,8 @@ if "%conda_exists%" == "F" (
|
||||
echo Miniconda version:
|
||||
call "%CONDA_ROOT_PREFIX%\_conda.exe" --version || ( echo. && echo Miniconda not found. && goto end )
|
||||
|
||||
@rem delete the Miniconda installer
|
||||
del "%INSTALL_DIR%\miniconda_installer.exe"
|
||||
@rem delete the Miniconda installer
|
||||
del "%INSTALL_DIR%\miniconda_installer.exe"
|
||||
)
|
||||
|
||||
@rem create the installer env
|
||||
|
Loading…
Reference in New Issue
Block a user