mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-02-05 00:10:46 +01:00
commit
9ac4d81c8b
1
.gitignore
vendored
1
.gitignore
vendored
@ -26,6 +26,7 @@
|
|||||||
.DS_Store
|
.DS_Store
|
||||||
.eslintrc.js
|
.eslintrc.js
|
||||||
.idea
|
.idea
|
||||||
|
.installer_state.json
|
||||||
.venv
|
.venv
|
||||||
venv
|
venv
|
||||||
.envrc
|
.envrc
|
||||||
|
@ -380,7 +380,7 @@ text-generation-webui
|
|||||||
│ │ └── tokenizer.model
|
│ │ └── tokenizer.model
|
||||||
```
|
```
|
||||||
|
|
||||||
In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with
|
In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with:
|
||||||
|
|
||||||
```
|
```
|
||||||
python download-model.py organization/model
|
python download-model.py organization/model
|
||||||
|
10
css/main.css
10
css/main.css
@ -1259,6 +1259,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||||||
left: 25px;
|
left: 25px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.footer-button.footer-continue-button {
|
||||||
|
bottom: -23px;
|
||||||
|
left: 50px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.footer-button.footer-remove-button {
|
||||||
|
bottom: -23px;
|
||||||
|
left: 75px;
|
||||||
|
}
|
||||||
|
|
||||||
.message:hover .footer-button,
|
.message:hover .footer-button,
|
||||||
.user-message:hover .footer-button,
|
.user-message:hover .footer-button,
|
||||||
.assistant-message:hover .footer-button {
|
.assistant-message:hover .footer-button {
|
||||||
|
@ -14,7 +14,7 @@ Add `--api` to your command-line flags.
|
|||||||
* To create a public Cloudflare URL, add the `--public-api` flag.
|
* To create a public Cloudflare URL, add the `--public-api` flag.
|
||||||
* To listen on your local network, add the `--listen` flag.
|
* To listen on your local network, add the `--listen` flag.
|
||||||
* To change the port, which is 5000 by default, use `--api-port 1234` (change 1234 to your desired port number).
|
* To change the port, which is 5000 by default, use `--api-port 1234` (change 1234 to your desired port number).
|
||||||
* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. Note that it doesn't work with `--public-api`.
|
* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. ⚠️ **Note**: this doesn't work with `--public-api` since Cloudflare already uses HTTPS by default.
|
||||||
* To use an API key for authentication, add `--api-key yourkey`.
|
* To use an API key for authentication, add `--api-key yourkey`.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
@ -51,8 +51,7 @@ curl http://127.0.0.1:5000/v1/chat/completions \
|
|||||||
"content": "Hello!"
|
"content": "Hello!"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"mode": "instruct",
|
"mode": "instruct"
|
||||||
"instruction_template": "Alpaca"
|
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -86,7 +85,6 @@ curl http://127.0.0.1:5000/v1/chat/completions \
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"mode": "instruct",
|
"mode": "instruct",
|
||||||
"instruction_template": "Alpaca",
|
|
||||||
"stream": true
|
"stream": true
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
@ -131,9 +129,6 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \
|
|||||||
"args": {
|
"args": {
|
||||||
"load_in_4bit": true,
|
"load_in_4bit": true,
|
||||||
"n_gpu_layers": 12
|
"n_gpu_layers": 12
|
||||||
},
|
|
||||||
"settings": {
|
|
||||||
"instruction_template": "Alpaca"
|
|
||||||
}
|
}
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
@ -198,7 +193,7 @@ while True:
|
|||||||
assistant_message = ''
|
assistant_message = ''
|
||||||
for event in client.events():
|
for event in client.events():
|
||||||
payload = json.loads(event.data)
|
payload = json.loads(event.data)
|
||||||
chunk = payload['choices'][0]['message']['content']
|
chunk = payload['choices'][0]['delta']['content']
|
||||||
assistant_message += chunk
|
assistant_message += chunk
|
||||||
print(chunk, end='')
|
print(chunk, end='')
|
||||||
|
|
||||||
@ -241,6 +236,27 @@ for event in client.events():
|
|||||||
print()
|
print()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Python example with API key
|
||||||
|
|
||||||
|
Replace
|
||||||
|
|
||||||
|
```python
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
with
|
||||||
|
|
||||||
|
```python
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": "Bearer yourPassword123"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
in any of the examples above.
|
||||||
|
|
||||||
### Environment variables
|
### Environment variables
|
||||||
|
|
||||||
The following environment variables can be used (they take precedence over everything else):
|
The following environment variables can be used (they take precedence over everything else):
|
||||||
|
@ -14,6 +14,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
from multiprocessing import Array
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
@ -27,9 +28,10 @@ base = os.environ.get("HF_ENDPOINT") or "https://huggingface.co"
|
|||||||
|
|
||||||
|
|
||||||
class ModelDownloader:
|
class ModelDownloader:
|
||||||
def __init__(self, max_retries=5):
|
def __init__(self, max_retries=7):
|
||||||
self.max_retries = max_retries
|
self.max_retries = max_retries
|
||||||
self.session = self.get_session()
|
self.session = self.get_session()
|
||||||
|
self._progress_bar_slots = None
|
||||||
|
|
||||||
def get_session(self):
|
def get_session(self):
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
@ -186,73 +188,112 @@ class ModelDownloader:
|
|||||||
output_folder = Path(base_folder) / output_folder
|
output_folder = Path(base_folder) / output_folder
|
||||||
return output_folder
|
return output_folder
|
||||||
|
|
||||||
|
@property
|
||||||
|
def progress_bar_slots(self):
|
||||||
|
if self._progress_bar_slots is None:
|
||||||
|
raise RuntimeError("Progress bar slots not initialized. Start download threads first.")
|
||||||
|
|
||||||
|
return self._progress_bar_slots
|
||||||
|
|
||||||
|
def initialize_progress_bar_slots(self, num_threads):
|
||||||
|
self._progress_bar_slots = Array("B", [0] * num_threads)
|
||||||
|
|
||||||
|
def get_progress_bar_position(self):
|
||||||
|
with self.progress_bar_slots.get_lock():
|
||||||
|
for i in range(len(self.progress_bar_slots)):
|
||||||
|
if self.progress_bar_slots[i] == 0:
|
||||||
|
self.progress_bar_slots[i] = 1
|
||||||
|
return i
|
||||||
|
|
||||||
|
return 0 # fallback
|
||||||
|
|
||||||
|
def release_progress_bar_position(self, slot):
|
||||||
|
with self.progress_bar_slots.get_lock():
|
||||||
|
self.progress_bar_slots[slot] = 0
|
||||||
|
|
||||||
def get_single_file(self, url, output_folder, start_from_scratch=False):
|
def get_single_file(self, url, output_folder, start_from_scratch=False):
|
||||||
filename = Path(url.rsplit('/', 1)[1])
|
filename = Path(url.rsplit('/', 1)[1])
|
||||||
output_path = output_folder / filename
|
output_path = output_folder / filename
|
||||||
|
progress_bar_position = self.get_progress_bar_position()
|
||||||
|
|
||||||
max_retries = 7
|
max_retries = self.max_retries
|
||||||
attempt = 0
|
attempt = 0
|
||||||
while attempt < max_retries:
|
try:
|
||||||
attempt += 1
|
while attempt < max_retries:
|
||||||
session = self.session
|
attempt += 1
|
||||||
headers = {}
|
session = self.session
|
||||||
mode = 'wb'
|
headers = {}
|
||||||
|
mode = 'wb'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if output_path.exists() and not start_from_scratch:
|
if output_path.exists() and not start_from_scratch:
|
||||||
# Resume download
|
# Resume download
|
||||||
r = session.get(url, stream=True, timeout=20)
|
r = session.get(url, stream=True, timeout=20)
|
||||||
total_size = int(r.headers.get('content-length', 0))
|
total_size = int(r.headers.get('content-length', 0))
|
||||||
if output_path.stat().st_size >= total_size:
|
if output_path.stat().st_size >= total_size:
|
||||||
return
|
return
|
||||||
|
|
||||||
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
|
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
|
||||||
mode = 'ab'
|
mode = 'ab'
|
||||||
|
|
||||||
with session.get(url, stream=True, headers=headers, timeout=30) as r:
|
with session.get(url, stream=True, headers=headers, timeout=30) as r:
|
||||||
r.raise_for_status() # If status is not 2xx, raise an error
|
r.raise_for_status() # If status is not 2xx, raise an error
|
||||||
total_size = int(r.headers.get('content-length', 0))
|
total_size = int(r.headers.get('content-length', 0))
|
||||||
block_size = 1024 * 1024 # 1MB
|
block_size = 1024 * 1024 # 1MB
|
||||||
|
|
||||||
filename_str = str(filename) # Convert PosixPath to string if necessary
|
filename_str = str(filename) # Convert PosixPath to string if necessary
|
||||||
|
|
||||||
tqdm_kwargs = {
|
tqdm_kwargs = {
|
||||||
'total': total_size,
|
'total': total_size,
|
||||||
'unit': 'B',
|
'unit': 'B',
|
||||||
'unit_scale': True,
|
'unit_scale': True,
|
||||||
'unit_divisor': 1024,
|
'unit_divisor': 1024,
|
||||||
'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
|
'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]',
|
||||||
'desc': f"{filename_str}: "
|
'desc': f"{filename_str}: ",
|
||||||
}
|
'position': progress_bar_position,
|
||||||
|
'leave': False
|
||||||
|
}
|
||||||
|
|
||||||
if 'COLAB_GPU' in os.environ:
|
if 'COLAB_GPU' in os.environ:
|
||||||
tqdm_kwargs.update({
|
tqdm_kwargs.update({
|
||||||
'position': 0,
|
'position': 0,
|
||||||
'leave': True
|
'leave': True
|
||||||
})
|
})
|
||||||
|
|
||||||
with open(output_path, mode) as f:
|
with open(output_path, mode) as f:
|
||||||
with tqdm.tqdm(**tqdm_kwargs) as t:
|
with tqdm.tqdm(**tqdm_kwargs) as t:
|
||||||
count = 0
|
count = 0
|
||||||
for data in r.iter_content(block_size):
|
for data in r.iter_content(block_size):
|
||||||
f.write(data)
|
f.write(data)
|
||||||
t.update(len(data))
|
t.update(len(data))
|
||||||
if total_size != 0 and self.progress_bar is not None:
|
if total_size != 0 and self.progress_bar is not None:
|
||||||
count += len(data)
|
count += len(data)
|
||||||
self.progress_bar(float(count) / float(total_size), f"{filename_str}")
|
self.progress_bar(float(count) / float(total_size), f"{filename_str}")
|
||||||
|
|
||||||
break # Exit loop if successful
|
break # Exit loop if successful
|
||||||
except (RequestException, ConnectionError, Timeout) as e:
|
except (RequestException, ConnectionError, Timeout) as e:
|
||||||
print(f"Error downloading {filename}: {e}.")
|
print(f"Error downloading {filename}: {e}.")
|
||||||
print(f"That was attempt {attempt}/{max_retries}.", end=' ')
|
print(f"That was attempt {attempt}/{max_retries}.", end=' ')
|
||||||
if attempt < max_retries:
|
if attempt < max_retries:
|
||||||
print(f"Retry begins in {2 ** attempt} seconds.")
|
print(f"Retry begins in {2 ** attempt} seconds.")
|
||||||
sleep(2 ** attempt)
|
sleep(2 ** attempt)
|
||||||
else:
|
else:
|
||||||
print("Failed to download after the maximum number of attempts.")
|
print("Failed to download after the maximum number of attempts.")
|
||||||
|
finally:
|
||||||
|
self.release_progress_bar_position(progress_bar_position)
|
||||||
|
|
||||||
def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):
|
def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):
|
||||||
thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
|
self.initialize_progress_bar_slots(threads)
|
||||||
|
tqdm.tqdm.set_lock(tqdm.tqdm.get_lock())
|
||||||
|
try:
|
||||||
|
thread_map(
|
||||||
|
lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch),
|
||||||
|
file_list,
|
||||||
|
max_workers=threads,
|
||||||
|
disable=True
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")
|
||||||
|
|
||||||
def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
|
def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
|
||||||
self.progress_bar = progress_bar
|
self.progress_bar = progress_bar
|
||||||
@ -318,7 +359,7 @@ if __name__ == '__main__':
|
|||||||
parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).')
|
parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).')
|
||||||
parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
|
parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
|
||||||
parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
|
parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
|
||||||
parser.add_argument('--max-retries', type=int, default=5, help='Max retries count when get error in download time.')
|
parser.add_argument('--max-retries', type=int, default=7, help='Max retries count when get error in download time.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
branch = args.branch
|
branch = args.branch
|
||||||
|
@ -557,12 +557,6 @@ def calc_trainable_parameters(model):
|
|||||||
|
|
||||||
def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str, precize_slicing_overlap: bool, add_eos_token_type: str, save_steps_under_loss: float, add_bos_token: bool, training_projection: str,sliding_window:bool,warmup_ratio:float, grad_accumulation: int,neft_noise_alpha:float):
|
def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str, precize_slicing_overlap: bool, add_eos_token_type: str, save_steps_under_loss: float, add_bos_token: bool, training_projection: str,sliding_window:bool,warmup_ratio:float, grad_accumulation: int,neft_noise_alpha:float):
|
||||||
|
|
||||||
if shared.args.monkey_patch:
|
|
||||||
from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import (
|
|
||||||
replace_peft_model_with_int4_lora_model
|
|
||||||
)
|
|
||||||
replace_peft_model_with_int4_lora_model()
|
|
||||||
|
|
||||||
global train_log_graph
|
global train_log_graph
|
||||||
global WANT_INTERRUPT
|
global WANT_INTERRUPT
|
||||||
WANT_INTERRUPT = False
|
WANT_INTERRUPT = False
|
||||||
@ -600,10 +594,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||||||
|
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
if shared.args.loader == 'GPTQ-for-LLaMa' and not shared.args.monkey_patch:
|
|
||||||
yield "LoRA training with GPTQ-for-LLaMa requires loading with `--monkey-patch`", zero_pd
|
|
||||||
return
|
|
||||||
|
|
||||||
if cutoff_len <= 0 or micro_batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
|
if cutoff_len <= 0 or micro_batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0:
|
||||||
yield "Cannot input zeroes.", zero_pd
|
yield "Cannot input zeroes.", zero_pd
|
||||||
return
|
return
|
||||||
@ -865,15 +855,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||||||
yield traceback.format_exc().replace('\n', '\n\n'), zero_pd
|
yield traceback.format_exc().replace('\n', '\n\n'), zero_pd
|
||||||
return
|
return
|
||||||
|
|
||||||
if shared.args.monkey_patch:
|
|
||||||
from alpaca_lora_4bit.autograd_4bit import Autograd4bitQuantLinear
|
|
||||||
from alpaca_lora_4bit.models import Linear4bitLt
|
|
||||||
for _, m in lora_model.named_modules():
|
|
||||||
if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt):
|
|
||||||
if m.is_v1_model:
|
|
||||||
m.zeros = m.zeros.half()
|
|
||||||
m.scales = m.scales.half()
|
|
||||||
|
|
||||||
class Tracked():
|
class Tracked():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.current_steps = 0
|
self.current_steps = 0
|
||||||
|
@ -146,7 +146,7 @@ def convert_history(history):
|
|||||||
for item in entry['content']:
|
for item in entry['content']:
|
||||||
if not isinstance(item, dict):
|
if not isinstance(item, dict):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
image_url = None
|
image_url = None
|
||||||
content = None
|
content = None
|
||||||
if item['type'] == 'image_url' and isinstance(item['image_url'], dict):
|
if item['type'] == 'image_url' and isinstance(item['image_url'], dict):
|
||||||
@ -205,7 +205,7 @@ def convert_history(history):
|
|||||||
else:
|
else:
|
||||||
chat_dialogue.append(['', current_reply])
|
chat_dialogue.append(['', current_reply])
|
||||||
elif role == "system":
|
elif role == "system":
|
||||||
system_message = content
|
system_message += f"\n{content}" if system_message else content
|
||||||
|
|
||||||
if not user_input_last:
|
if not user_input_last:
|
||||||
user_input = ""
|
user_input = ""
|
||||||
|
@ -22,6 +22,14 @@ function regenerateClick() {
|
|||||||
document.getElementById("Regenerate").click();
|
document.getElementById("Regenerate").click();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function continueClick() {
|
||||||
|
document.getElementById("Continue").click();
|
||||||
|
}
|
||||||
|
|
||||||
|
function removeLastClick() {
|
||||||
|
document.getElementById("Remove-last").click();
|
||||||
|
}
|
||||||
|
|
||||||
function handleMorphdomUpdate(text) {
|
function handleMorphdomUpdate(text) {
|
||||||
morphdom(
|
morphdom(
|
||||||
document.getElementById("chat").parentNode,
|
document.getElementById("chat").parentNode,
|
||||||
|
@ -30,8 +30,13 @@ from modules.text_generation import (
|
|||||||
)
|
)
|
||||||
from modules.utils import delete_file, get_available_characters, save_file
|
from modules.utils import delete_file, get_available_characters, save_file
|
||||||
|
|
||||||
# Copied from the Transformers library
|
|
||||||
|
def strftime_now(format):
|
||||||
|
return datetime.now().strftime(format)
|
||||||
|
|
||||||
|
|
||||||
jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
|
jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True)
|
||||||
|
jinja_env.globals["strftime_now"] = strftime_now
|
||||||
|
|
||||||
|
|
||||||
def str_presenter(dumper, data):
|
def str_presenter(dumper, data):
|
||||||
|
@ -154,6 +154,8 @@ def add_long_list_class(html):
|
|||||||
|
|
||||||
@functools.lru_cache(maxsize=None)
|
@functools.lru_cache(maxsize=None)
|
||||||
def convert_to_markdown(string):
|
def convert_to_markdown(string):
|
||||||
|
if not string:
|
||||||
|
return ""
|
||||||
|
|
||||||
# Make \[ \] LaTeX equations inline
|
# Make \[ \] LaTeX equations inline
|
||||||
pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$'
|
pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$'
|
||||||
@ -304,8 +306,13 @@ def get_image_cache(path):
|
|||||||
|
|
||||||
copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-copy"><path d="M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z"></path><path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2"></path></svg>'''
|
copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-copy"><path d="M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z"></path><path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2"></path></svg>'''
|
||||||
refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-repeat"><path d="M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3"></path><path d="M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3"></path></svg>'''
|
refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-repeat"><path d="M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3"></path><path d="M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3"></path></svg>'''
|
||||||
copy_button = f'<button class="footer-button footer-copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>'
|
continue_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-player-play"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M7 4v16l13 -8z" /></svg>'''
|
||||||
refresh_button = f'<button class="footer-button footer-refresh-button" onclick="regenerateClick()">{refresh_svg}</button>'
|
remove_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-trash"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M4 7l16 0" /><path d="M10 11l0 6" /><path d="M14 11l0 6" /><path d="M5 7l1 12a2 2 0 0 0 2 2h8a2 2 0 0 0 2 -2l1 -12" /><path d="M9 7v-3a1 1 0 0 1 1 -1h4a1 1 0 0 1 1 1v3" /></svg>'''
|
||||||
|
|
||||||
|
copy_button = f'<button class="footer-button footer-copy-button" title="Copy" onclick="copyToClipboard(this)">{copy_svg}</button>'
|
||||||
|
refresh_button = f'<button class="footer-button footer-refresh-button" title="Regenerate" onclick="regenerateClick()">{refresh_svg}</button>'
|
||||||
|
continue_button = f'<button class="footer-button footer-continue-button" title="Continue" onclick="continueClick()">{continue_svg}</button>'
|
||||||
|
remove_button = f'<button class="footer-button footer-remove-button" title="Remove last reply" onclick="removeLastClick()">{remove_svg}</button>'
|
||||||
|
|
||||||
|
|
||||||
def generate_instruct_html(history):
|
def generate_instruct_html(history):
|
||||||
@ -334,6 +341,8 @@ def generate_instruct_html(history):
|
|||||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||||
f'{copy_button}'
|
f'{copy_button}'
|
||||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
|
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
|
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
)
|
)
|
||||||
@ -383,6 +392,8 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
|
|||||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||||
f'{copy_button}'
|
f'{copy_button}'
|
||||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
|
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
|
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
)
|
)
|
||||||
@ -417,6 +428,8 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
|
|||||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||||
f'{copy_button}'
|
f'{copy_button}'
|
||||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
|
f'{continue_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
|
f'{remove_button if i == len(history["visible"]) - 1 else ""}'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
f'</div>'
|
f'</div>'
|
||||||
)
|
)
|
||||||
|
@ -202,7 +202,19 @@ class LlamacppHF(PreTrainedModel):
|
|||||||
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
||||||
|
|
||||||
Llama = llama_cpp_lib().Llama
|
Llama = llama_cpp_lib().Llama
|
||||||
model = Llama(**params)
|
try:
|
||||||
|
model = Llama(**params)
|
||||||
|
except Exception as e:
|
||||||
|
error_message = (
|
||||||
|
f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
|
||||||
|
f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
|
||||||
|
f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
|
||||||
|
"\n"
|
||||||
|
f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
|
||||||
|
)
|
||||||
|
|
||||||
|
raise type(e)(error_message) from e
|
||||||
|
|
||||||
model.last_updated_index = -1
|
model.last_updated_index = -1
|
||||||
|
|
||||||
return LlamacppHF(model, model_file)
|
return LlamacppHF(model, model_file)
|
||||||
|
@ -108,7 +108,19 @@ class LlamaCppModel:
|
|||||||
params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
||||||
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type)
|
||||||
|
|
||||||
result.model = Llama(**params)
|
try:
|
||||||
|
result.model = Llama(**params)
|
||||||
|
except Exception as e:
|
||||||
|
error_message = (
|
||||||
|
f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n"
|
||||||
|
f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})."
|
||||||
|
f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}"
|
||||||
|
"\n"
|
||||||
|
f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})."
|
||||||
|
)
|
||||||
|
|
||||||
|
raise type(e)(error_message) from e
|
||||||
|
|
||||||
if cache_capacity > 0:
|
if cache_capacity > 0:
|
||||||
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))
|
result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity))
|
||||||
|
|
||||||
|
@ -89,8 +89,8 @@ def create_ui():
|
|||||||
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
|
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
|
||||||
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
|
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch)
|
||||||
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
|
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
|
||||||
shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
|
shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
|
||||||
shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
|
shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
|
||||||
shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
|
shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
|
||||||
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
|
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
|
||||||
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
|
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
|
||||||
|
160
one_click.py
160
one_click.py
@ -1,6 +1,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import glob
|
import glob
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import re
|
import re
|
||||||
@ -101,32 +102,25 @@ def torch_version():
|
|||||||
|
|
||||||
|
|
||||||
def update_pytorch():
|
def update_pytorch():
|
||||||
print_big_message("Checking for PyTorch updates")
|
print_big_message("Checking for PyTorch updates.")
|
||||||
|
|
||||||
torver = torch_version()
|
torver = torch_version()
|
||||||
is_cuda = '+cu' in torver
|
base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
|
||||||
is_cuda118 = '+cu118' in torver # 2.1.0+cu118
|
|
||||||
is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
|
|
||||||
is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
|
|
||||||
is_cpu = '+cpu' in torver # 2.0.1+cpu
|
|
||||||
|
|
||||||
install_pytorch = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
|
if "+cu118" in torver:
|
||||||
|
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu118"
|
||||||
|
elif "+cu" in torver:
|
||||||
|
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu121"
|
||||||
|
elif "+rocm" in torver:
|
||||||
|
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.1"
|
||||||
|
elif "+cpu" in torver:
|
||||||
|
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
|
||||||
|
elif "+cxx11" in torver:
|
||||||
|
intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
|
||||||
|
install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
|
||||||
|
else:
|
||||||
|
install_cmd = base_cmd
|
||||||
|
|
||||||
if is_cuda118:
|
run_cmd(install_cmd, assert_success=True, environment=True)
|
||||||
install_pytorch += "--index-url https://download.pytorch.org/whl/cu118"
|
|
||||||
elif is_cuda:
|
|
||||||
install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
|
|
||||||
elif is_rocm:
|
|
||||||
install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1"
|
|
||||||
elif is_cpu:
|
|
||||||
install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
|
|
||||||
elif is_intel:
|
|
||||||
if is_linux():
|
|
||||||
install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
|
|
||||||
else:
|
|
||||||
install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
|
|
||||||
|
|
||||||
run_cmd(f"{install_pytorch}", assert_success=True, environment=True)
|
|
||||||
|
|
||||||
|
|
||||||
def is_installed():
|
def is_installed():
|
||||||
@ -155,6 +149,11 @@ def check_env():
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_commit():
|
||||||
|
result = run_cmd("git rev-parse HEAD", capture_output=True, environment=True)
|
||||||
|
return result.stdout.decode('utf-8').strip()
|
||||||
|
|
||||||
|
|
||||||
def clear_cache():
|
def clear_cache():
|
||||||
run_cmd("conda clean -a -y", environment=True)
|
run_cmd("conda clean -a -y", environment=True)
|
||||||
run_cmd("python -m pip cache purge", environment=True)
|
run_cmd("python -m pip cache purge", environment=True)
|
||||||
@ -340,27 +339,84 @@ def install_extensions_requirements():
|
|||||||
def update_requirements(initial_installation=False, pull=True):
|
def update_requirements(initial_installation=False, pull=True):
|
||||||
# Create .git directory if missing
|
# Create .git directory if missing
|
||||||
if not os.path.exists(os.path.join(script_dir, ".git")):
|
if not os.path.exists(os.path.join(script_dir, ".git")):
|
||||||
git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main'
|
run_cmd(
|
||||||
run_cmd(git_creation_cmd, environment=True, assert_success=True)
|
"git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && "
|
||||||
|
"git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && "
|
||||||
|
"git reset --hard origin/main && git branch --set-upstream-to=origin/main",
|
||||||
|
environment=True,
|
||||||
|
assert_success=True
|
||||||
|
)
|
||||||
|
|
||||||
|
torver = torch_version()
|
||||||
|
if "+rocm" in torver:
|
||||||
|
requirements_file = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
|
||||||
|
elif "+cpu" in torver or "+cxx11" in torver:
|
||||||
|
requirements_file = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
|
||||||
|
elif is_macos():
|
||||||
|
requirements_file = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
|
||||||
|
else:
|
||||||
|
requirements_file = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
|
||||||
|
|
||||||
|
# Load state from JSON file
|
||||||
|
state_file = '.installer_state.json'
|
||||||
|
current_commit = get_current_commit()
|
||||||
|
wheels_changed = False
|
||||||
|
if os.path.exists(state_file):
|
||||||
|
with open(state_file, 'r') as f:
|
||||||
|
last_state = json.load(f)
|
||||||
|
|
||||||
|
if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
|
||||||
|
wheels_changed = True
|
||||||
|
else:
|
||||||
|
wheels_changed = True
|
||||||
|
|
||||||
if pull:
|
if pull:
|
||||||
print_big_message("Updating the local copy of the repository with \"git pull\"")
|
# Read .whl lines before pulling
|
||||||
|
before_pull_whl_lines = []
|
||||||
|
if os.path.exists(requirements_file):
|
||||||
|
with open(requirements_file, 'r') as f:
|
||||||
|
before_pull_whl_lines = [line for line in f if '.whl' in line]
|
||||||
|
|
||||||
|
print_big_message('Updating the local copy of the repository with "git pull"')
|
||||||
|
|
||||||
|
# Hash files before pulling
|
||||||
files_to_check = [
|
files_to_check = [
|
||||||
'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat',
|
'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat',
|
||||||
'update_wizard_linux.sh', 'update_wizard_macos.sh', 'update_wizard_windows.bat', 'update_wizard_wsl.bat',
|
'update_wizard_linux.sh', 'update_wizard_macos.sh', 'update_wizard_windows.bat', 'update_wizard_wsl.bat',
|
||||||
'one_click.py'
|
'one_click.py'
|
||||||
]
|
]
|
||||||
|
before_hashes = {file: calculate_file_hash(file) for file in files_to_check}
|
||||||
|
|
||||||
before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
|
# Perform the git pull
|
||||||
run_cmd("git pull --autostash", assert_success=True, environment=True)
|
run_cmd("git pull --autostash", assert_success=True, environment=True)
|
||||||
after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check}
|
|
||||||
|
|
||||||
# Check for differences in installation file hashes
|
# Check hashes after pulling
|
||||||
for file_name in files_to_check:
|
after_hashes = {file: calculate_file_hash(file) for file in files_to_check}
|
||||||
if before_pull_hashes[file_name] != after_pull_hashes[file_name]:
|
if os.path.exists(requirements_file):
|
||||||
print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.")
|
with open(requirements_file, 'r') as f:
|
||||||
exit(1)
|
after_pull_whl_lines = [line for line in f if '.whl' in line]
|
||||||
|
|
||||||
|
wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines)
|
||||||
|
|
||||||
|
# Check for changes to installer files
|
||||||
|
for file in files_to_check:
|
||||||
|
if before_hashes[file] != after_hashes[file]:
|
||||||
|
print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
|
||||||
|
|
||||||
|
# Save state before exiting
|
||||||
|
current_state = {}
|
||||||
|
if wheels_changed:
|
||||||
|
current_state['wheels_changed'] = True
|
||||||
|
|
||||||
|
with open(state_file, 'w') as f:
|
||||||
|
json.dump(current_state, f)
|
||||||
|
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Save current state
|
||||||
|
current_state = {'last_installed_commit': current_commit}
|
||||||
|
with open(state_file, 'w') as f:
|
||||||
|
json.dump(current_state, f)
|
||||||
|
|
||||||
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
|
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
|
||||||
install_extensions_requirements()
|
install_extensions_requirements()
|
||||||
@ -369,38 +425,23 @@ def update_requirements(initial_installation=False, pull=True):
|
|||||||
if not initial_installation:
|
if not initial_installation:
|
||||||
update_pytorch()
|
update_pytorch()
|
||||||
|
|
||||||
# Detect the PyTorch version
|
|
||||||
torver = torch_version()
|
|
||||||
is_cuda = '+cu' in torver
|
|
||||||
is_cuda118 = '+cu118' in torver # 2.1.0+cu118
|
|
||||||
is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2
|
|
||||||
is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi
|
|
||||||
is_cpu = '+cpu' in torver # 2.0.1+cpu
|
|
||||||
|
|
||||||
if is_rocm:
|
|
||||||
base_requirements = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
|
|
||||||
elif is_cpu or is_intel:
|
|
||||||
base_requirements = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
|
|
||||||
elif is_macos():
|
|
||||||
base_requirements = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt"
|
|
||||||
else:
|
|
||||||
base_requirements = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt"
|
|
||||||
|
|
||||||
requirements_file = base_requirements
|
|
||||||
|
|
||||||
print_big_message(f"Installing webui requirements from file: {requirements_file}")
|
print_big_message(f"Installing webui requirements from file: {requirements_file}")
|
||||||
print(f"TORCH: {torver}\n")
|
print(f"TORCH: {torver}\n")
|
||||||
|
|
||||||
# Prepare the requirements file
|
# Prepare the requirements file
|
||||||
textgen_requirements = open(requirements_file).read().splitlines()
|
textgen_requirements = open(requirements_file).read().splitlines()
|
||||||
if is_cuda118:
|
|
||||||
|
if not initial_installation and not wheels_changed:
|
||||||
|
textgen_requirements = [line for line in textgen_requirements if '.whl' not in line]
|
||||||
|
|
||||||
|
if "+cu118" in torver:
|
||||||
textgen_requirements = [
|
textgen_requirements = [
|
||||||
req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
|
req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
|
||||||
for req in textgen_requirements
|
for req in textgen_requirements
|
||||||
if "autoawq" not in req.lower()
|
if "autoawq" not in req.lower()
|
||||||
]
|
]
|
||||||
|
|
||||||
if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11
|
if is_windows() and "+cu118" in torver: # No flash-attention on Windows for CUDA 11
|
||||||
textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
|
textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
|
||||||
|
|
||||||
with open('temp_requirements.txt', 'w') as file:
|
with open('temp_requirements.txt', 'w') as file:
|
||||||
@ -416,16 +457,9 @@ def update_requirements(initial_installation=False, pull=True):
|
|||||||
|
|
||||||
# Install/update the project requirements
|
# Install/update the project requirements
|
||||||
run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
|
run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True)
|
||||||
|
|
||||||
|
# Clean up
|
||||||
os.remove('temp_requirements.txt')
|
os.remove('temp_requirements.txt')
|
||||||
|
|
||||||
# Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility
|
|
||||||
if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1:
|
|
||||||
clear_cache()
|
|
||||||
return
|
|
||||||
|
|
||||||
if not os.path.exists("repositories/"):
|
|
||||||
os.mkdir("repositories")
|
|
||||||
|
|
||||||
clear_cache()
|
clear_cache()
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
bitsandbytes==0.45.*
|
bitsandbytes==0.45.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
@ -32,22 +32,22 @@ sse-starlette==1.6.5
|
|||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
|
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
|
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
@ -31,14 +31,14 @@ sse-starlette==1.6.5
|
|||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.6+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.6+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
@ -31,10 +31,10 @@ sse-starlette==1.6.5
|
|||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
@ -31,8 +31,8 @@ sse-starlette==1.6.5
|
|||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
|
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
@ -31,10 +31,10 @@ sse-starlette==1.6.5
|
|||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
|
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
@ -31,7 +31,7 @@ sse-starlette==1.6.5
|
|||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
@ -31,7 +31,7 @@ sse-starlette==1.6.5
|
|||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
bitsandbytes==0.45.*
|
bitsandbytes==0.45.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
@ -32,22 +32,22 @@ sse-starlette==1.6.5
|
|||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
|
# llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
|
# llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
accelerate==1.2.*
|
accelerate==1.3.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
|
@ -41,12 +41,12 @@ if "%conda_exists%" == "F" (
|
|||||||
mkdir "%INSTALL_DIR%"
|
mkdir "%INSTALL_DIR%"
|
||||||
call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
|
call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
|
||||||
|
|
||||||
:: Try CertUtil first
|
@rem Try CertUtil first
|
||||||
for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do (
|
for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do (
|
||||||
set "output=%%a"
|
set "output=%%a"
|
||||||
)
|
)
|
||||||
|
|
||||||
:: If CertUtil fails, try PowerShell
|
@rem If CertUtil fails, try PowerShell
|
||||||
if not defined output (
|
if not defined output (
|
||||||
for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do (
|
for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do (
|
||||||
set "output=%%a"
|
set "output=%%a"
|
||||||
|
Loading…
Reference in New Issue
Block a user