Merge pull request #6713 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2025-01-29 19:12:56 -03:00 committed by GitHub
commit 9ac4d81c8b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 350 additions and 217 deletions

1
.gitignore vendored
View File

@ -26,6 +26,7 @@
.DS_Store .DS_Store
.eslintrc.js .eslintrc.js
.idea .idea
.installer_state.json
.venv .venv
venv venv
.envrc .envrc

View File

@ -380,7 +380,7 @@ text-generation-webui
│   │   └── tokenizer.model │   │   └── tokenizer.model
``` ```
In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with:
``` ```
python download-model.py organization/model python download-model.py organization/model

View File

@ -1259,6 +1259,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
left: 25px; left: 25px;
} }
.footer-button.footer-continue-button {
bottom: -23px;
left: 50px;
}
.footer-button.footer-remove-button {
bottom: -23px;
left: 75px;
}
.message:hover .footer-button, .message:hover .footer-button,
.user-message:hover .footer-button, .user-message:hover .footer-button,
.assistant-message:hover .footer-button { .assistant-message:hover .footer-button {

View File

@ -14,7 +14,7 @@ Add `--api` to your command-line flags.
* To create a public Cloudflare URL, add the `--public-api` flag. * To create a public Cloudflare URL, add the `--public-api` flag.
* To listen on your local network, add the `--listen` flag. * To listen on your local network, add the `--listen` flag.
* To change the port, which is 5000 by default, use `--api-port 1234` (change 1234 to your desired port number). * To change the port, which is 5000 by default, use `--api-port 1234` (change 1234 to your desired port number).
* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. Note that it doesn't work with `--public-api`. * To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. ⚠️ **Note**: this doesn't work with `--public-api` since Cloudflare already uses HTTPS by default.
* To use an API key for authentication, add `--api-key yourkey`. * To use an API key for authentication, add `--api-key yourkey`.
### Examples ### Examples
@ -51,8 +51,7 @@ curl http://127.0.0.1:5000/v1/chat/completions \
"content": "Hello!" "content": "Hello!"
} }
], ],
"mode": "instruct", "mode": "instruct"
"instruction_template": "Alpaca"
}' }'
``` ```
@ -86,7 +85,6 @@ curl http://127.0.0.1:5000/v1/chat/completions \
} }
], ],
"mode": "instruct", "mode": "instruct",
"instruction_template": "Alpaca",
"stream": true "stream": true
}' }'
``` ```
@ -131,9 +129,6 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \
"args": { "args": {
"load_in_4bit": true, "load_in_4bit": true,
"n_gpu_layers": 12 "n_gpu_layers": 12
},
"settings": {
"instruction_template": "Alpaca"
} }
}' }'
``` ```
@ -198,7 +193,7 @@ while True:
assistant_message = '' assistant_message = ''
for event in client.events(): for event in client.events():
payload = json.loads(event.data) payload = json.loads(event.data)
chunk = payload['choices'][0]['message']['content'] chunk = payload['choices'][0]['delta']['content']
assistant_message += chunk assistant_message += chunk
print(chunk, end='') print(chunk, end='')
@ -241,6 +236,27 @@ for event in client.events():
print() print()
``` ```
#### Python example with API key
Replace
```python
headers = {
"Content-Type": "application/json"
}
```
with
```python
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer yourPassword123"
}
```
in any of the examples above.
### Environment variables ### Environment variables
The following environment variables can be used (they take precedence over everything else): The following environment variables can be used (they take precedence over everything else):

View File

@ -14,6 +14,7 @@ import json
import os import os
import re import re
import sys import sys
from multiprocessing import Array
from pathlib import Path from pathlib import Path
from time import sleep from time import sleep
@ -27,9 +28,10 @@ base = os.environ.get("HF_ENDPOINT") or "https://huggingface.co"
class ModelDownloader: class ModelDownloader:
def __init__(self, max_retries=5): def __init__(self, max_retries=7):
self.max_retries = max_retries self.max_retries = max_retries
self.session = self.get_session() self.session = self.get_session()
self._progress_bar_slots = None
def get_session(self): def get_session(self):
session = requests.Session() session = requests.Session()
@ -186,73 +188,112 @@ class ModelDownloader:
output_folder = Path(base_folder) / output_folder output_folder = Path(base_folder) / output_folder
return output_folder return output_folder
@property
def progress_bar_slots(self):
if self._progress_bar_slots is None:
raise RuntimeError("Progress bar slots not initialized. Start download threads first.")
return self._progress_bar_slots
def initialize_progress_bar_slots(self, num_threads):
self._progress_bar_slots = Array("B", [0] * num_threads)
def get_progress_bar_position(self):
with self.progress_bar_slots.get_lock():
for i in range(len(self.progress_bar_slots)):
if self.progress_bar_slots[i] == 0:
self.progress_bar_slots[i] = 1
return i
return 0 # fallback
def release_progress_bar_position(self, slot):
with self.progress_bar_slots.get_lock():
self.progress_bar_slots[slot] = 0
def get_single_file(self, url, output_folder, start_from_scratch=False): def get_single_file(self, url, output_folder, start_from_scratch=False):
filename = Path(url.rsplit('/', 1)[1]) filename = Path(url.rsplit('/', 1)[1])
output_path = output_folder / filename output_path = output_folder / filename
progress_bar_position = self.get_progress_bar_position()
max_retries = 7 max_retries = self.max_retries
attempt = 0 attempt = 0
while attempt < max_retries: try:
attempt += 1 while attempt < max_retries:
session = self.session attempt += 1
headers = {} session = self.session
mode = 'wb' headers = {}
mode = 'wb'
try: try:
if output_path.exists() and not start_from_scratch: if output_path.exists() and not start_from_scratch:
# Resume download # Resume download
r = session.get(url, stream=True, timeout=20) r = session.get(url, stream=True, timeout=20)
total_size = int(r.headers.get('content-length', 0)) total_size = int(r.headers.get('content-length', 0))
if output_path.stat().st_size >= total_size: if output_path.stat().st_size >= total_size:
return return
headers = {'Range': f'bytes={output_path.stat().st_size}-'} headers = {'Range': f'bytes={output_path.stat().st_size}-'}
mode = 'ab' mode = 'ab'
with session.get(url, stream=True, headers=headers, timeout=30) as r: with session.get(url, stream=True, headers=headers, timeout=30) as r:
r.raise_for_status() # If status is not 2xx, raise an error r.raise_for_status() # If status is not 2xx, raise an error
total_size = int(r.headers.get('content-length', 0)) total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1MB block_size = 1024 * 1024 # 1MB
filename_str = str(filename) # Convert PosixPath to string if necessary filename_str = str(filename) # Convert PosixPath to string if necessary
tqdm_kwargs = { tqdm_kwargs = {
'total': total_size, 'total': total_size,
'unit': 'B', 'unit': 'B',
'unit_scale': True, 'unit_scale': True,
'unit_divisor': 1024, 'unit_divisor': 1024,
'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]', 'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
'desc': f"{filename_str}: " 'desc': f"{filename_str}: ",
} 'position': progress_bar_position,
'leave': False
}
if 'COLAB_GPU' in os.environ: if 'COLAB_GPU' in os.environ:
tqdm_kwargs.update({ tqdm_kwargs.update({
'position': 0, 'position': 0,
'leave': True 'leave': True
}) })
with open(output_path, mode) as f: with open(output_path, mode) as f:
with tqdm.tqdm(**tqdm_kwargs) as t: with tqdm.tqdm(**tqdm_kwargs) as t:
count = 0 count = 0
for data in r.iter_content(block_size): for data in r.iter_content(block_size):
f.write(data) f.write(data)
t.update(len(data)) t.update(len(data))
if total_size != 0 and self.progress_bar is not None: if total_size != 0 and self.progress_bar is not None:
count += len(data) count += len(data)
self.progress_bar(float(count) / float(total_size), f"{filename_str}") self.progress_bar(float(count) / float(total_size), f"{filename_str}")
break # Exit loop if successful break # Exit loop if successful
except (RequestException, ConnectionError, Timeout) as e: except (RequestException, ConnectionError, Timeout) as e:
print(f"Error downloading {filename}: {e}.") print(f"Error downloading {filename}: {e}.")
print(f"That was attempt {attempt}/{max_retries}.", end=' ') print(f"That was attempt {attempt}/{max_retries}.", end=' ')
if attempt < max_retries: if attempt < max_retries:
print(f"Retry begins in {2 ** attempt} seconds.") print(f"Retry begins in {2 ** attempt} seconds.")
sleep(2 ** attempt) sleep(2 ** attempt)
else: else:
print("Failed to download after the maximum number of attempts.") print("Failed to download after the maximum number of attempts.")
finally:
self.release_progress_bar_position(progress_bar_position)
def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4): def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):
thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True) self.initialize_progress_bar_slots(threads)
tqdm.tqdm.set_lock(tqdm.tqdm.get_lock())
try:
thread_map(
lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch),
file_list,
max_workers=threads,
disable=True
)
finally:
print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")
def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False): def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
self.progress_bar = progress_bar self.progress_bar = progress_bar
@ -318,7 +359,7 @@ if __name__ == '__main__':
parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).') parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).')
parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.') parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.') parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
parser.add_argument('--max-retries', type=int, default=5, help='Max retries count when get error in download time.') parser.add_argument('--max-retries', type=int, default=7, help='Max retries count when get error in download time.')
args = parser.parse_args() args = parser.parse_args()
branch = args.branch branch = args.branch

View File

@ -557,12 +557,6 @@ def calc_trainable_parameters(model):
def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str, precize_slicing_overlap: bool, add_eos_token_type: str, save_steps_under_loss: float, add_bos_token: bool, training_projection: str,sliding_window:bool,warmup_ratio:float, grad_accumulation: int,neft_noise_alpha:float): def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str, precize_slicing_overlap: bool, add_eos_token_type: str, save_steps_under_loss: float, add_bos_token: bool, training_projection: str,sliding_window:bool,warmup_ratio:float, grad_accumulation: int,neft_noise_alpha:float):
if shared.args.monkey_patch:
from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import (
replace_peft_model_with_int4_lora_model
)
replace_peft_model_with_int4_lora_model()
global train_log_graph global train_log_graph
global WANT_INTERRUPT global WANT_INTERRUPT
WANT_INTERRUPT = False WANT_INTERRUPT = False
@ -600,10 +594,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
time.sleep(5) time.sleep(5)
if shared.args.loader == 'GPTQ-for-LLaMa' and not shared.args.monkey_patch:
yield "LoRA training with GPTQ-for-LLaMa requires loading with `--monkey-patch`", zero_pd
return
if cutoff_len <= 0 or micro_batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0: if cutoff_len <= 0 or micro_batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
yield "Cannot input zeroes.", zero_pd yield "Cannot input zeroes.", zero_pd
return return
@ -865,15 +855,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
yield traceback.format_exc().replace('\n', '\n\n'), zero_pd yield traceback.format_exc().replace('\n', '\n\n'), zero_pd
return return
if shared.args.monkey_patch:
from alpaca_lora_4bit.autograd_4bit import Autograd4bitQuantLinear
from alpaca_lora_4bit.models import Linear4bitLt
for _, m in lora_model.named_modules():
if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt):
if m.is_v1_model:
m.zeros = m.zeros.half()
m.scales = m.scales.half()
class Tracked(): class Tracked():
def __init__(self): def __init__(self):
self.current_steps = 0 self.current_steps = 0

View File

@ -146,7 +146,7 @@ def convert_history(history):
for item in entry['content']: for item in entry['content']:
if not isinstance(item, dict): if not isinstance(item, dict):
continue continue
image_url = None image_url = None
content = None content = None
if item['type'] == 'image_url' and isinstance(item['image_url'], dict): if item['type'] == 'image_url' and isinstance(item['image_url'], dict):
@ -205,7 +205,7 @@ def convert_history(history):
else: else:
chat_dialogue.append(['', current_reply]) chat_dialogue.append(['', current_reply])
elif role == "system": elif role == "system":
system_message = content system_message += f"\n{content}" if system_message else content
if not user_input_last: if not user_input_last:
user_input = "" user_input = ""

View File

@ -22,6 +22,14 @@ function regenerateClick() {
document.getElementById("Regenerate").click(); document.getElementById("Regenerate").click();
} }
function continueClick() {
document.getElementById("Continue").click();
}
function removeLastClick() {
document.getElementById("Remove-last").click();
}
function handleMorphdomUpdate(text) { function handleMorphdomUpdate(text) {
morphdom( morphdom(
document.getElementById("chat").parentNode, document.getElementById("chat").parentNode,

View File

@ -30,8 +30,13 @@ from modules.text_generation import (
) )
from modules.utils import delete_file, get_available_characters, save_file from modules.utils import delete_file, get_available_characters, save_file
# Copied from the Transformers library
def strftime_now(format):
return datetime.now().strftime(format)
jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True) jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
jinja_env.globals["strftime_now"] = strftime_now
def str_presenter(dumper, data): def str_presenter(dumper, data):

View File

@ -154,6 +154,8 @@ def add_long_list_class(html):
@functools.lru_cache(maxsize=None) @functools.lru_cache(maxsize=None)
def convert_to_markdown(string): def convert_to_markdown(string):
if not string:
return ""
# Make \[ \] LaTeX equations inline # Make \[ \] LaTeX equations inline
pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$' pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$'
@ -304,8 +306,13 @@ def get_image_cache(path):
copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-copy"><path d="M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z"></path><path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2"></path></svg>''' copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-copy"><path d="M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z"></path><path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2"></path></svg>'''
refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-repeat"><path d="M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3"></path><path d="M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3"></path></svg>''' refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-repeat"><path d="M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3"></path><path d="M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3"></path></svg>'''
copy_button = f'<button class="footer-button footer-copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>' continue_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-player-play"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M7 4v16l13 -8z" /></svg>'''
refresh_button = f'<button class="footer-button footer-refresh-button" onclick="regenerateClick()">{refresh_svg}</button>' remove_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-trash"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M4 7l16 0" /><path d="M10 11l0 6" /><path d="M14 11l0 6" /><path d="M5 7l1 12a2 2 0 0 0 2 2h8a2 2 0 0 0 2 -2l1 -12" /><path d="M9 7v-3a1 1 0 0 1 1 -1h4a1 1 0 0 1 1 1v3" /></svg>'''
copy_button = f'<button class="footer-button footer-copy-button" title="Copy" onclick="copyToClipboard(this)">{copy_svg}</button>'
refresh_button = f'<button class="footer-button footer-refresh-button" title="Regenerate" onclick="regenerateClick()">{refresh_svg}</button>'
continue_button = f'<button class="footer-button footer-continue-button" title="Continue" onclick="continueClick()">{continue_svg}</button>'
remove_button = f'<button class="footer-button footer-remove-button" title="Remove last reply" onclick="removeLastClick()">{remove_svg}</button>'
def generate_instruct_html(history): def generate_instruct_html(history):
@ -334,6 +341,8 @@ def generate_instruct_html(history):
f'<div class="message-body">{converted_visible[1]}</div>' f'<div class="message-body">{converted_visible[1]}</div>'
f'{copy_button}' f'{copy_button}'
f'{refresh_button if i == len(history["visible"]) - 1 else ""}' f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
f'</div>' f'</div>'
f'</div>' f'</div>'
) )
@ -383,6 +392,8 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
f'<div class="message-body">{converted_visible[1]}</div>' f'<div class="message-body">{converted_visible[1]}</div>'
f'{copy_button}' f'{copy_button}'
f'{refresh_button if i == len(history["visible"]) - 1 else ""}' f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
f'</div>' f'</div>'
f'</div>' f'</div>'
) )
@ -417,6 +428,8 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
f'<div class="message-body">{converted_visible[1]}</div>' f'<div class="message-body">{converted_visible[1]}</div>'
f'{copy_button}' f'{copy_button}'
f'{refresh_button if i == len(history["visible"]) - 1 else ""}' f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
f'</div>' f'</div>'
f'</div>' f'</div>'
) )

View File

@ -202,7 +202,19 @@ class LlamacppHF(PreTrainedModel):
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
Llama = llama_cpp_lib().Llama Llama = llama_cpp_lib().Llama
model = Llama(**params) try:
model = Llama(**params)
except Exception as e:
error_message = (
f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
"\n"
f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
)
raise type(e)(error_message) from e
model.last_updated_index = -1 model.last_updated_index = -1
return LlamacppHF(model, model_file) return LlamacppHF(model, model_file)

View File

@ -108,7 +108,19 @@ class LlamaCppModel:
params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
result.model = Llama(**params) try:
result.model = Llama(**params)
except Exception as e:
error_message = (
f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
"\n"
f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
)
raise type(e)(error_message) from e
if cache_capacity > 0: if cache_capacity > 0:
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity)) result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))

View File

@ -89,8 +89,8 @@ def create_ui():
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch) shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch) shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model.') shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model.') shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.') shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40') shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')

View File

@ -1,6 +1,7 @@
import argparse import argparse
import glob import glob
import hashlib import hashlib
import json
import os import os
import platform import platform
import re import re
@ -101,32 +102,25 @@ def torch_version():
def update_pytorch(): def update_pytorch():
print_big_message("Checking for PyTorch updates") print_big_message("Checking for PyTorch updates.")
torver = torch_version() torver = torch_version()
is_cuda = '+cu' in torver base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
is_cuda118 = '+cu118' in torver # 2.1.0+cu118
is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
is_cpu = '+cpu' in torver # 2.0.1+cpu
install_pytorch = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} " if "+cu118" in torver:
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu118"
elif "+cu" in torver:
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu121"
elif "+rocm" in torver:
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.1"
elif "+cpu" in torver:
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
elif "+cxx11" in torver:
intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else:
install_cmd = base_cmd
if is_cuda118: run_cmd(install_cmd, assert_success=True, environment=True)
install_pytorch += "--index-url https://download.pytorch.org/whl/cu118"
elif is_cuda:
install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
elif is_rocm:
install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1"
elif is_cpu:
install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
elif is_intel:
if is_linux():
install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else:
install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
run_cmd(f"{install_pytorch}", assert_success=True, environment=True)
def is_installed(): def is_installed():
@ -155,6 +149,11 @@ def check_env():
sys.exit(1) sys.exit(1)
def get_current_commit():
result = run_cmd("git rev-parse HEAD", capture_output=True, environment=True)
return result.stdout.decode('utf-8').strip()
def clear_cache(): def clear_cache():
run_cmd("conda clean -a -y", environment=True) run_cmd("conda clean -a -y", environment=True)
run_cmd("python -m pip cache purge", environment=True) run_cmd("python -m pip cache purge", environment=True)
@ -340,27 +339,84 @@ def install_extensions_requirements():
def update_requirements(initial_installation=False, pull=True): def update_requirements(initial_installation=False, pull=True):
# Create .git directory if missing # Create .git directory if missing
if not os.path.exists(os.path.join(script_dir, ".git")): if not os.path.exists(os.path.join(script_dir, ".git")):
git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main' run_cmd(
run_cmd(git_creation_cmd, environment=True, assert_success=True) "git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && "
"git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && "
"git reset --hard origin/main && git branch --set-upstream-to=origin/main",
environment=True,
assert_success=True
)
torver = torch_version()
if "+rocm" in torver:
requirements_file = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
elif "+cpu" in torver or "+cxx11" in torver:
requirements_file = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
elif is_macos():
requirements_file = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
else:
requirements_file = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
# Load state from JSON file
state_file = '.installer_state.json'
current_commit = get_current_commit()
wheels_changed = False
if os.path.exists(state_file):
with open(state_file, 'r') as f:
last_state = json.load(f)
if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
wheels_changed = True
else:
wheels_changed = True
if pull: if pull:
print_big_message("Updating the local copy of the repository with \"git pull\"") # Read .whl lines before pulling
before_pull_whl_lines = []
if os.path.exists(requirements_file):
with open(requirements_file, 'r') as f:
before_pull_whl_lines = [line for line in f if '.whl' in line]
print_big_message('Updating the local copy of the repository with "git pull"')
# Hash files before pulling
files_to_check = [ files_to_check = [
'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat', 'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat',
'update_wizard_linux.sh', 'update_wizard_macos.sh', 'update_wizard_windows.bat', 'update_wizard_wsl.bat', 'update_wizard_linux.sh', 'update_wizard_macos.sh', 'update_wizard_windows.bat', 'update_wizard_wsl.bat',
'one_click.py' 'one_click.py'
] ]
before_hashes = {file: calculate_file_hash(file) for file in files_to_check}
before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} # Perform the git pull
run_cmd("git pull --autostash", assert_success=True, environment=True) run_cmd("git pull --autostash", assert_success=True, environment=True)
after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
# Check for differences in installation file hashes # Check hashes after pulling
for file_name in files_to_check: after_hashes = {file: calculate_file_hash(file) for file in files_to_check}
if before_pull_hashes[file_name] != after_pull_hashes[file_name]: if os.path.exists(requirements_file):
print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.") with open(requirements_file, 'r') as f:
exit(1) after_pull_whl_lines = [line for line in f if '.whl' in line]
wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
# Check for changes to installer files
for file in files_to_check:
if before_hashes[file] != after_hashes[file]:
print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
# Save state before exiting
current_state = {}
if wheels_changed:
current_state['wheels_changed'] = True
with open(state_file, 'w') as f:
json.dump(current_state, f)
sys.exit(1)
# Save current state
current_state = {'last_installed_commit': current_commit}
with open(state_file, 'w') as f:
json.dump(current_state, f)
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"): if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
install_extensions_requirements() install_extensions_requirements()
@ -369,38 +425,23 @@ def update_requirements(initial_installation=False, pull=True):
if not initial_installation: if not initial_installation:
update_pytorch() update_pytorch()
# Detect the PyTorch version
torver = torch_version()
is_cuda = '+cu' in torver
is_cuda118 = '+cu118' in torver # 2.1.0+cu118
is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
is_cpu = '+cpu' in torver # 2.0.1+cpu
if is_rocm:
base_requirements = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
elif is_cpu or is_intel:
base_requirements = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
elif is_macos():
base_requirements = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
else:
base_requirements = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
requirements_file = base_requirements
print_big_message(f"Installing webui requirements from file: {requirements_file}") print_big_message(f"Installing webui requirements from file: {requirements_file}")
print(f"TORCH: {torver}\n") print(f"TORCH: {torver}\n")
# Prepare the requirements file # Prepare the requirements file
textgen_requirements = open(requirements_file).read().splitlines() textgen_requirements = open(requirements_file).read().splitlines()
if is_cuda118:
if not initial_installation and not wheels_changed:
textgen_requirements = [line for line in textgen_requirements if '.whl' not in line]
if "+cu118" in torver:
textgen_requirements = [ textgen_requirements = [
req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
for req in textgen_requirements for req in textgen_requirements
if "autoawq" not in req.lower() if "autoawq" not in req.lower()
] ]
if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 if is_windows() and "+cu118" in torver: # No flash-attention on Windows for CUDA 11
textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
with open('temp_requirements.txt', 'w') as file: with open('temp_requirements.txt', 'w') as file:
@ -416,16 +457,9 @@ def update_requirements(initial_installation=False, pull=True):
# Install/update the project requirements # Install/update the project requirements
run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
# Clean up
os.remove('temp_requirements.txt') os.remove('temp_requirements.txt')
# Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility
if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1:
clear_cache()
return
if not os.path.exists("repositories/"):
os.mkdir("repositories")
clear_cache() clear_cache()

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
bitsandbytes==0.45.* bitsandbytes==0.45.*
colorama colorama
datasets datasets
@ -32,22 +32,22 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ) # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ) # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels # CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
colorama colorama
datasets datasets
einops einops
@ -31,14 +31,14 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels # AMD wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.6+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.6+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
colorama colorama
datasets datasets
einops einops
@ -31,10 +31,10 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# AMD wheels # AMD wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
colorama colorama
datasets datasets
einops einops
@ -31,8 +31,8 @@ sse-starlette==1.6.5
tiktoken tiktoken
# Mac wheels # Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
colorama colorama
datasets datasets
einops einops
@ -31,10 +31,10 @@ sse-starlette==1.6.5
tiktoken tiktoken
# Mac wheels # Mac wheels
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
colorama colorama
datasets datasets
einops einops
@ -31,7 +31,7 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, AVX2) # llama-cpp-python (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
colorama colorama
datasets datasets
einops einops
@ -31,7 +31,7 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
bitsandbytes==0.45.* bitsandbytes==0.45.*
colorama colorama
datasets datasets
@ -32,22 +32,22 @@ sse-starlette==1.6.5
tiktoken tiktoken
# llama-cpp-python (CPU only, no AVX2) # llama-cpp-python (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ) # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ) # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
# CUDA wheels # CUDA wheels
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

View File

@ -1,4 +1,4 @@
accelerate==1.2.* accelerate==1.3.*
colorama colorama
datasets datasets
einops einops

View File

@ -41,12 +41,12 @@ if "%conda_exists%" == "F" (
mkdir "%INSTALL_DIR%" mkdir "%INSTALL_DIR%"
call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end ) call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
:: Try CertUtil first @rem Try CertUtil first
for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do ( for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do (
set "output=%%a" set "output=%%a"
) )
:: If CertUtil fails, try PowerShell @rem If CertUtil fails, try PowerShell
if not defined output ( if not defined output (
for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do ( for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do (
set "output=%%a" set "output=%%a"