mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-27 01:59:14 +01:00
commit
dd46229487
@ -26,13 +26,16 @@ base = "https://huggingface.co"
|
|||||||
|
|
||||||
class ModelDownloader:
|
class ModelDownloader:
|
||||||
def __init__(self, max_retries=5):
|
def __init__(self, max_retries=5):
|
||||||
self.session = requests.Session()
|
self.max_retries = max_retries
|
||||||
if max_retries:
|
|
||||||
self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
|
def get_session(self):
|
||||||
self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
|
session = requests.Session()
|
||||||
|
if self.max_retries:
|
||||||
|
session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=self.max_retries))
|
||||||
|
session.mount('https://huggingface.co', HTTPAdapter(max_retries=self.max_retries))
|
||||||
|
|
||||||
if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
|
if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
|
||||||
self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
|
session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from huggingface_hub import get_token
|
from huggingface_hub import get_token
|
||||||
@ -41,7 +44,9 @@ class ModelDownloader:
|
|||||||
token = os.getenv("HF_TOKEN")
|
token = os.getenv("HF_TOKEN")
|
||||||
|
|
||||||
if token is not None:
|
if token is not None:
|
||||||
self.session.headers = {'authorization': f'Bearer {token}'}
|
session.headers = {'authorization': f'Bearer {token}'}
|
||||||
|
|
||||||
|
return session
|
||||||
|
|
||||||
def sanitize_model_and_branch_names(self, model, branch):
|
def sanitize_model_and_branch_names(self, model, branch):
|
||||||
if model[-1] == '/':
|
if model[-1] == '/':
|
||||||
@ -65,6 +70,7 @@ class ModelDownloader:
|
|||||||
return model, branch
|
return model, branch
|
||||||
|
|
||||||
def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None):
|
def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None):
|
||||||
|
session = self.get_session()
|
||||||
page = f"/api/models/{model}/tree/{branch}"
|
page = f"/api/models/{model}/tree/{branch}"
|
||||||
cursor = b""
|
cursor = b""
|
||||||
|
|
||||||
@ -78,7 +84,7 @@ class ModelDownloader:
|
|||||||
is_lora = False
|
is_lora = False
|
||||||
while True:
|
while True:
|
||||||
url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
|
url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
|
||||||
r = self.session.get(url, timeout=10)
|
r = session.get(url, timeout=10)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
content = r.content
|
content = r.content
|
||||||
|
|
||||||
@ -156,9 +162,8 @@ class ModelDownloader:
|
|||||||
is_llamacpp = has_gguf and specific_file is not None
|
is_llamacpp = has_gguf and specific_file is not None
|
||||||
return links, sha256, is_lora, is_llamacpp
|
return links, sha256, is_lora, is_llamacpp
|
||||||
|
|
||||||
def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None):
|
def get_output_folder(self, model, branch, is_lora, is_llamacpp=False):
|
||||||
if base_folder is None:
|
base_folder = 'models' if not is_lora else 'loras'
|
||||||
base_folder = 'models' if not is_lora else 'loras'
|
|
||||||
|
|
||||||
# If the model is of type GGUF, save directly in the base_folder
|
# If the model is of type GGUF, save directly in the base_folder
|
||||||
if is_llamacpp:
|
if is_llamacpp:
|
||||||
@ -172,6 +177,7 @@ class ModelDownloader:
|
|||||||
return output_folder
|
return output_folder
|
||||||
|
|
||||||
def get_single_file(self, url, output_folder, start_from_scratch=False):
|
def get_single_file(self, url, output_folder, start_from_scratch=False):
|
||||||
|
session = self.get_session()
|
||||||
filename = Path(url.rsplit('/', 1)[1])
|
filename = Path(url.rsplit('/', 1)[1])
|
||||||
output_path = output_folder / filename
|
output_path = output_folder / filename
|
||||||
headers = {}
|
headers = {}
|
||||||
@ -179,7 +185,7 @@ class ModelDownloader:
|
|||||||
if output_path.exists() and not start_from_scratch:
|
if output_path.exists() and not start_from_scratch:
|
||||||
|
|
||||||
# Check if the file has already been downloaded completely
|
# Check if the file has already been downloaded completely
|
||||||
r = self.session.get(url, stream=True, timeout=10)
|
r = session.get(url, stream=True, timeout=10)
|
||||||
total_size = int(r.headers.get('content-length', 0))
|
total_size = int(r.headers.get('content-length', 0))
|
||||||
if output_path.stat().st_size >= total_size:
|
if output_path.stat().st_size >= total_size:
|
||||||
return
|
return
|
||||||
@ -188,7 +194,7 @@ class ModelDownloader:
|
|||||||
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
|
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
|
||||||
mode = 'ab'
|
mode = 'ab'
|
||||||
|
|
||||||
with self.session.get(url, stream=True, headers=headers, timeout=10) as r:
|
with session.get(url, stream=True, headers=headers, timeout=10) as r:
|
||||||
r.raise_for_status() # Do not continue the download if the request was unsuccessful
|
r.raise_for_status() # Do not continue the download if the request was unsuccessful
|
||||||
total_size = int(r.headers.get('content-length', 0))
|
total_size = int(r.headers.get('content-length', 0))
|
||||||
block_size = 1024 * 1024 # 1MB
|
block_size = 1024 * 1024 # 1MB
|
||||||
@ -303,7 +309,10 @@ if __name__ == '__main__':
|
|||||||
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)
|
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)
|
||||||
|
|
||||||
# Get the output folder
|
# Get the output folder
|
||||||
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output)
|
if args.output:
|
||||||
|
output_folder = Path(args.output)
|
||||||
|
else:
|
||||||
|
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
|
||||||
|
|
||||||
if args.check:
|
if args.check:
|
||||||
# Check previously downloaded files
|
# Check previously downloaded files
|
||||||
|
@ -4,7 +4,7 @@ instruction_template: |-
|
|||||||
{{- message['content'] -}}
|
{{- message['content'] -}}
|
||||||
{%- else -%}
|
{%- else -%}
|
||||||
{%- if message['role'] == 'user' -%}
|
{%- if message['role'] == 'user' -%}
|
||||||
{{-' [INST] ' + message['content'].rstrip() + ' [/INST] '-}}
|
{{-'[INST] ' + message['content'].rstrip() + ' [/INST]'-}}
|
||||||
{%- else -%}
|
{%- else -%}
|
||||||
{{-'' + message['content'] + '</s>' -}}
|
{{-'' + message['content'] + '</s>' -}}
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
|
@ -166,53 +166,54 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||||||
prompt = remove_extra_bos(prompt)
|
prompt = remove_extra_bos(prompt)
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
# Handle truncation
|
|
||||||
max_length = get_max_prompt_length(state)
|
|
||||||
prompt = make_prompt(messages)
|
prompt = make_prompt(messages)
|
||||||
encoded_length = get_encoded_length(prompt)
|
|
||||||
|
|
||||||
while len(messages) > 0 and encoded_length > max_length:
|
# Handle truncation
|
||||||
|
if shared.tokenizer is not None:
|
||||||
|
max_length = get_max_prompt_length(state)
|
||||||
|
encoded_length = get_encoded_length(prompt)
|
||||||
|
while len(messages) > 0 and encoded_length > max_length:
|
||||||
|
|
||||||
# Remove old message, save system message
|
# Remove old message, save system message
|
||||||
if len(messages) > 2 and messages[0]['role'] == 'system':
|
if len(messages) > 2 and messages[0]['role'] == 'system':
|
||||||
messages.pop(1)
|
messages.pop(1)
|
||||||
|
|
||||||
# Remove old message when no system message is present
|
# Remove old message when no system message is present
|
||||||
elif len(messages) > 1 and messages[0]['role'] != 'system':
|
elif len(messages) > 1 and messages[0]['role'] != 'system':
|
||||||
messages.pop(0)
|
messages.pop(0)
|
||||||
|
|
||||||
# Resort to truncating the user input
|
# Resort to truncating the user input
|
||||||
else:
|
else:
|
||||||
|
|
||||||
user_message = messages[-1]['content']
|
user_message = messages[-1]['content']
|
||||||
|
|
||||||
# Bisect the truncation point
|
# Bisect the truncation point
|
||||||
left, right = 0, len(user_message) - 1
|
left, right = 0, len(user_message) - 1
|
||||||
|
|
||||||
while right - left > 1:
|
while right - left > 1:
|
||||||
mid = (left + right) // 2
|
mid = (left + right) // 2
|
||||||
|
|
||||||
messages[-1]['content'] = user_message[mid:]
|
messages[-1]['content'] = user_message[mid:]
|
||||||
|
prompt = make_prompt(messages)
|
||||||
|
encoded_length = get_encoded_length(prompt)
|
||||||
|
|
||||||
|
if encoded_length <= max_length:
|
||||||
|
right = mid
|
||||||
|
else:
|
||||||
|
left = mid
|
||||||
|
|
||||||
|
messages[-1]['content'] = user_message[right:]
|
||||||
prompt = make_prompt(messages)
|
prompt = make_prompt(messages)
|
||||||
encoded_length = get_encoded_length(prompt)
|
encoded_length = get_encoded_length(prompt)
|
||||||
|
if encoded_length > max_length:
|
||||||
if encoded_length <= max_length:
|
logger.error(f"Failed to build the chat prompt. The input is too long for the available context length.\n\nTruncation length: {state['truncation_length']}\nmax_new_tokens: {state['max_new_tokens']} (is it too high?)\nAvailable context length: {max_length}\n")
|
||||||
right = mid
|
raise ValueError
|
||||||
else:
|
else:
|
||||||
left = mid
|
logger.warning(f"The input has been truncated. Context length: {state['truncation_length']}, max_new_tokens: {state['max_new_tokens']}, available context length: {max_length}.")
|
||||||
|
break
|
||||||
|
|
||||||
messages[-1]['content'] = user_message[right:]
|
|
||||||
prompt = make_prompt(messages)
|
prompt = make_prompt(messages)
|
||||||
encoded_length = get_encoded_length(prompt)
|
encoded_length = get_encoded_length(prompt)
|
||||||
if encoded_length > max_length:
|
|
||||||
logger.error(f"Failed to build the chat prompt. The input is too long for the available context length.\n\nTruncation length: {state['truncation_length']}\nmax_new_tokens: {state['max_new_tokens']} (is it too high?)\nAvailable context length: {max_length}\n")
|
|
||||||
raise ValueError
|
|
||||||
else:
|
|
||||||
logger.warning(f"The input has been truncated. Context length: {state['truncation_length']}, max_new_tokens: {state['max_new_tokens']}, available context length: {max_length}.")
|
|
||||||
break
|
|
||||||
|
|
||||||
prompt = make_prompt(messages)
|
|
||||||
encoded_length = get_encoded_length(prompt)
|
|
||||||
|
|
||||||
if also_return_rows:
|
if also_return_rows:
|
||||||
return prompt, [message['content'] for message in messages]
|
return prompt, [message['content'] for message in messages]
|
||||||
@ -690,6 +691,9 @@ def load_character(character, name1, name2):
|
|||||||
|
|
||||||
|
|
||||||
def load_instruction_template(template):
|
def load_instruction_template(template):
|
||||||
|
if template == 'None':
|
||||||
|
return ''
|
||||||
|
|
||||||
for filepath in [Path(f'instruction-templates/{template}.yaml'), Path('instruction-templates/Alpaca.yaml')]:
|
for filepath in [Path(f'instruction-templates/{template}.yaml'), Path('instruction-templates/Alpaca.yaml')]:
|
||||||
if filepath.exists():
|
if filepath.exists():
|
||||||
break
|
break
|
||||||
|
@ -51,18 +51,21 @@ class Exllamav2Model:
|
|||||||
|
|
||||||
model = ExLlamaV2(config)
|
model = ExLlamaV2(config)
|
||||||
|
|
||||||
split = None
|
if shared.args.cache_8bit:
|
||||||
if shared.args.gpu_split:
|
cache = ExLlamaV2Cache_8bit(model, lazy=True)
|
||||||
split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
|
else:
|
||||||
|
cache = ExLlamaV2Cache(model, lazy=True)
|
||||||
|
|
||||||
model.load(split)
|
if shared.args.autosplit:
|
||||||
|
model.load_autosplit(cache)
|
||||||
|
else:
|
||||||
|
split = None
|
||||||
|
if shared.args.gpu_split:
|
||||||
|
split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
|
||||||
|
|
||||||
|
model.load(split)
|
||||||
|
|
||||||
tokenizer = ExLlamaV2Tokenizer(config)
|
tokenizer = ExLlamaV2Tokenizer(config)
|
||||||
if shared.args.cache_8bit:
|
|
||||||
cache = ExLlamaV2Cache_8bit(model)
|
|
||||||
else:
|
|
||||||
cache = ExLlamaV2Cache(model)
|
|
||||||
|
|
||||||
generator = ExLlamaV2StreamingGenerator(model, cache, tokenizer)
|
generator = ExLlamaV2StreamingGenerator(model, cache, tokenizer)
|
||||||
|
|
||||||
result = self()
|
result = self()
|
||||||
|
@ -37,18 +37,22 @@ class Exllamav2HF(PreTrainedModel):
|
|||||||
super().__init__(PretrainedConfig())
|
super().__init__(PretrainedConfig())
|
||||||
self.ex_config = config
|
self.ex_config = config
|
||||||
self.ex_model = ExLlamaV2(config)
|
self.ex_model = ExLlamaV2(config)
|
||||||
split = None
|
|
||||||
if shared.args.gpu_split:
|
|
||||||
split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
|
|
||||||
|
|
||||||
self.ex_model.load(split)
|
|
||||||
self.generation_config = GenerationConfig()
|
|
||||||
self.loras = None
|
self.loras = None
|
||||||
|
self.generation_config = GenerationConfig()
|
||||||
|
|
||||||
if shared.args.cache_8bit:
|
if shared.args.cache_8bit:
|
||||||
self.ex_cache = ExLlamaV2Cache_8bit(self.ex_model)
|
self.ex_cache = ExLlamaV2Cache_8bit(self.ex_model, lazy=True)
|
||||||
else:
|
else:
|
||||||
self.ex_cache = ExLlamaV2Cache(self.ex_model)
|
self.ex_cache = ExLlamaV2Cache(self.ex_model, lazy=True)
|
||||||
|
|
||||||
|
if shared.args.autosplit:
|
||||||
|
self.ex_model.load_autosplit(self.ex_cache)
|
||||||
|
else:
|
||||||
|
split = None
|
||||||
|
if shared.args.gpu_split:
|
||||||
|
split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
|
||||||
|
|
||||||
|
self.ex_model.load(split)
|
||||||
|
|
||||||
self.past_seq = None
|
self.past_seq = None
|
||||||
if shared.args.cfg_cache:
|
if shared.args.cfg_cache:
|
||||||
|
@ -78,6 +78,7 @@ loaders_and_params = OrderedDict({
|
|||||||
'no_flash_attn',
|
'no_flash_attn',
|
||||||
'num_experts_per_token',
|
'num_experts_per_token',
|
||||||
'cache_8bit',
|
'cache_8bit',
|
||||||
|
'autosplit',
|
||||||
'alpha_value',
|
'alpha_value',
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
'trust_remote_code',
|
'trust_remote_code',
|
||||||
@ -89,6 +90,7 @@ loaders_and_params = OrderedDict({
|
|||||||
'no_flash_attn',
|
'no_flash_attn',
|
||||||
'num_experts_per_token',
|
'num_experts_per_token',
|
||||||
'cache_8bit',
|
'cache_8bit',
|
||||||
|
'autosplit',
|
||||||
'alpha_value',
|
'alpha_value',
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
'exllamav2_info',
|
'exllamav2_info',
|
||||||
|
@ -257,7 +257,7 @@ def llamacpp_HF_loader(model_name):
|
|||||||
path = Path(f'{shared.args.model_dir}/{model_name}')
|
path = Path(f'{shared.args.model_dir}/{model_name}')
|
||||||
|
|
||||||
# Check if a HF tokenizer is available for the model
|
# Check if a HF tokenizer is available for the model
|
||||||
if all((path / file).exists() for file in ['tokenizer.model', 'tokenizer_config.json']):
|
if all((path / file).exists() for file in ['tokenizer_config.json']):
|
||||||
logger.info(f'Using tokenizer from: \"{path}\"')
|
logger.info(f'Using tokenizer from: \"{path}\"')
|
||||||
else:
|
else:
|
||||||
logger.error("Could not load the model because a tokenizer in Transformers format was not found.")
|
logger.error("Could not load the model because a tokenizer in Transformers format was not found.")
|
||||||
|
@ -153,6 +153,8 @@ def infer_loader(model_name, model_settings):
|
|||||||
loader = 'ExLlamav2_HF'
|
loader = 'ExLlamav2_HF'
|
||||||
elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
|
elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()):
|
||||||
loader = 'AutoAWQ'
|
loader = 'AutoAWQ'
|
||||||
|
elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists():
|
||||||
|
loader = 'llamacpp_HF'
|
||||||
elif len(list(path_to_model.glob('*.gguf'))) > 0:
|
elif len(list(path_to_model.glob('*.gguf'))) > 0:
|
||||||
loader = 'llama.cpp'
|
loader = 'llama.cpp'
|
||||||
elif re.match(r'.*\.gguf', model_name.lower()):
|
elif re.match(r'.*\.gguf', model_name.lower()):
|
||||||
@ -225,7 +227,7 @@ def apply_model_settings_to_state(model, state):
|
|||||||
loader = model_settings.pop('loader')
|
loader = model_settings.pop('loader')
|
||||||
|
|
||||||
# If the user is using an alternative loader for the same model type, let them keep using it
|
# If the user is using an alternative loader for the same model type, let them keep using it
|
||||||
if not (loader == 'ExLlamav2_HF' and state['loader'] in ['GPTQ-for-LLaMa', 'ExLlamav2', 'AutoGPTQ']) and not (loader == 'llama.cpp' and state['loader'] in ['llamacpp_HF', 'ctransformers']):
|
if not (loader == 'ExLlamav2_HF' and state['loader'] in ['GPTQ-for-LLaMa', 'ExLlamav2', 'AutoGPTQ']) and not (loader == 'llama.cpp' and state['loader'] in ['ctransformers']):
|
||||||
state['loader'] = loader
|
state['loader'] = loader
|
||||||
|
|
||||||
for k in model_settings:
|
for k in model_settings:
|
||||||
@ -243,27 +245,54 @@ def save_model_settings(model, state):
|
|||||||
Save the settings for this model to models/config-user.yaml
|
Save the settings for this model to models/config-user.yaml
|
||||||
'''
|
'''
|
||||||
if model == 'None':
|
if model == 'None':
|
||||||
yield ("Not saving the settings because no model is loaded.")
|
yield ("Not saving the settings because no model is selected in the menu.")
|
||||||
return
|
return
|
||||||
|
|
||||||
with Path(f'{shared.args.model_dir}/config-user.yaml') as p:
|
user_config = shared.load_user_config()
|
||||||
if p.exists():
|
model_regex = model + '$' # For exact matches
|
||||||
user_config = yaml.safe_load(open(p, 'r').read())
|
if model_regex not in user_config:
|
||||||
else:
|
user_config[model_regex] = {}
|
||||||
user_config = {}
|
|
||||||
|
|
||||||
model_regex = model + '$' # For exact matches
|
for k in ui.list_model_elements():
|
||||||
if model_regex not in user_config:
|
if k == 'loader' or k in loaders.loaders_and_params[state['loader']]:
|
||||||
user_config[model_regex] = {}
|
user_config[model_regex][k] = state[k]
|
||||||
|
|
||||||
for k in ui.list_model_elements():
|
shared.user_config = user_config
|
||||||
if k == 'loader' or k in loaders.loaders_and_params[state['loader']]:
|
|
||||||
user_config[model_regex][k] = state[k]
|
|
||||||
|
|
||||||
shared.user_config = user_config
|
output = yaml.dump(user_config, sort_keys=False)
|
||||||
|
p = Path(f'{shared.args.model_dir}/config-user.yaml')
|
||||||
|
with open(p, 'w') as f:
|
||||||
|
f.write(output)
|
||||||
|
|
||||||
output = yaml.dump(user_config, sort_keys=False)
|
yield (f"Settings for `{model}` saved to `{p}`.")
|
||||||
with open(p, 'w') as f:
|
|
||||||
f.write(output)
|
|
||||||
|
|
||||||
yield (f"Settings for `{model}` saved to `{p}`.")
|
|
||||||
|
def save_instruction_template(model, template):
|
||||||
|
'''
|
||||||
|
Similar to the function above, but it saves only the instruction template.
|
||||||
|
'''
|
||||||
|
if model == 'None':
|
||||||
|
yield ("Not saving the template because no model is selected in the menu.")
|
||||||
|
return
|
||||||
|
|
||||||
|
user_config = shared.load_user_config()
|
||||||
|
model_regex = model + '$' # For exact matches
|
||||||
|
if model_regex not in user_config:
|
||||||
|
user_config[model_regex] = {}
|
||||||
|
|
||||||
|
if template == 'None':
|
||||||
|
user_config[model_regex].pop('instruction_template', None)
|
||||||
|
else:
|
||||||
|
user_config[model_regex]['instruction_template'] = template
|
||||||
|
|
||||||
|
shared.user_config = user_config
|
||||||
|
|
||||||
|
output = yaml.dump(user_config, sort_keys=False)
|
||||||
|
p = Path(f'{shared.args.model_dir}/config-user.yaml')
|
||||||
|
with open(p, 'w') as f:
|
||||||
|
f.write(output)
|
||||||
|
|
||||||
|
if template == 'None':
|
||||||
|
yield (f"Instruction template for `{model}` unset in `{p}`, as the value for template was `{template}`.")
|
||||||
|
else:
|
||||||
|
yield (f"Instruction template for `{model}` saved to `{p}` as `{template}`.")
|
||||||
|
@ -134,6 +134,7 @@ group.add_argument('--row_split', action='store_true', help='Split the model by
|
|||||||
# ExLlamaV2
|
# ExLlamaV2
|
||||||
group = parser.add_argument_group('ExLlamaV2')
|
group = parser.add_argument_group('ExLlamaV2')
|
||||||
group.add_argument('--gpu-split', type=str, help='Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.')
|
group.add_argument('--gpu-split', type=str, help='Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.')
|
||||||
|
group.add_argument('--autosplit', action='store_true', help='Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.')
|
||||||
group.add_argument('--max_seq_len', type=int, default=2048, help='Maximum sequence length.')
|
group.add_argument('--max_seq_len', type=int, default=2048, help='Maximum sequence length.')
|
||||||
group.add_argument('--cfg-cache', action='store_true', help='ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.')
|
group.add_argument('--cfg-cache', action='store_true', help='ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.')
|
||||||
group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.')
|
group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.')
|
||||||
@ -279,6 +280,23 @@ def is_chat():
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def load_user_config():
|
||||||
|
'''
|
||||||
|
Loads custom model-specific settings
|
||||||
|
'''
|
||||||
|
if Path(f'{args.model_dir}/config-user.yaml').exists():
|
||||||
|
file_content = open(f'{args.model_dir}/config-user.yaml', 'r').read().strip()
|
||||||
|
|
||||||
|
if file_content:
|
||||||
|
user_config = yaml.safe_load(file_content)
|
||||||
|
else:
|
||||||
|
user_config = {}
|
||||||
|
else:
|
||||||
|
user_config = {}
|
||||||
|
|
||||||
|
return user_config
|
||||||
|
|
||||||
|
|
||||||
args.loader = fix_loader_name(args.loader)
|
args.loader = fix_loader_name(args.loader)
|
||||||
|
|
||||||
# Activate the multimodal extension
|
# Activate the multimodal extension
|
||||||
@ -297,11 +315,7 @@ with Path(f'{args.model_dir}/config.yaml') as p:
|
|||||||
model_config = {}
|
model_config = {}
|
||||||
|
|
||||||
# Load custom model-specific settings
|
# Load custom model-specific settings
|
||||||
with Path(f'{args.model_dir}/config-user.yaml') as p:
|
user_config = load_user_config()
|
||||||
if p.exists():
|
|
||||||
user_config = yaml.safe_load(open(p, 'r').read())
|
|
||||||
else:
|
|
||||||
user_config = {}
|
|
||||||
|
|
||||||
model_config = OrderedDict(model_config)
|
model_config = OrderedDict(model_config)
|
||||||
user_config = OrderedDict(user_config)
|
user_config = OrderedDict(user_config)
|
||||||
|
@ -76,6 +76,7 @@ def list_model_elements():
|
|||||||
'no_flash_attn',
|
'no_flash_attn',
|
||||||
'num_experts_per_token',
|
'num_experts_per_token',
|
||||||
'cache_8bit',
|
'cache_8bit',
|
||||||
|
'autosplit',
|
||||||
'threads',
|
'threads',
|
||||||
'threads_batch',
|
'threads_batch',
|
||||||
'n_batch',
|
'n_batch',
|
||||||
|
@ -109,7 +109,7 @@ def create_chat_settings_ui():
|
|||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', value='Select template to load...', elem_classes='slim-dropdown')
|
shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', info="After selecting the template, click on \"Load\" to load and apply it.", value='None', elem_classes='slim-dropdown')
|
||||||
ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
|
ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
|
||||||
shared.gradio['load_template'] = gr.Button("Load", elem_classes='refresh-button')
|
shared.gradio['load_template'] = gr.Button("Load", elem_classes='refresh-button')
|
||||||
shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
|
shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
|
||||||
|
@ -17,6 +17,7 @@ from modules.models import load_model, unload_model
|
|||||||
from modules.models_settings import (
|
from modules.models_settings import (
|
||||||
apply_model_settings_to_state,
|
apply_model_settings_to_state,
|
||||||
get_model_metadata,
|
get_model_metadata,
|
||||||
|
save_instruction_template,
|
||||||
save_model_settings,
|
save_model_settings,
|
||||||
update_model_parameters
|
update_model_parameters
|
||||||
)
|
)
|
||||||
@ -131,6 +132,7 @@ def create_ui():
|
|||||||
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
||||||
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
|
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
|
||||||
shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.')
|
shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.')
|
||||||
|
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
|
||||||
shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn, info='Force flash-attention to not be used.')
|
shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn, info='Force flash-attention to not be used.')
|
||||||
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
|
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
|
||||||
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
|
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
|
||||||
@ -143,17 +145,35 @@ def create_ui():
|
|||||||
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
|
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
|
||||||
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
|
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
|
||||||
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
|
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
|
||||||
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.")
|
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.")
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu)
|
shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu)
|
||||||
|
|
||||||
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
|
with gr.Tab("Download"):
|
||||||
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
|
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
|
||||||
with gr.Row():
|
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
|
||||||
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
|
with gr.Row():
|
||||||
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
|
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
|
||||||
|
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
|
||||||
|
|
||||||
|
with gr.Tab("llamacpp_HF creator"):
|
||||||
|
with gr.Row():
|
||||||
|
shared.gradio['gguf_menu'] = gr.Dropdown(choices=utils.get_available_ggufs(), value=lambda: shared.model_name, label='Choose your GGUF', elem_classes='slim-dropdown', interactive=not mu)
|
||||||
|
ui.create_refresh_button(shared.gradio['gguf_menu'], lambda: None, lambda: {'choices': utils.get_available_ggufs()}, 'refresh-button', interactive=not mu)
|
||||||
|
|
||||||
|
shared.gradio['unquantized_url'] = gr.Textbox(label="Enter the URL for the original (unquantized) model", info="Example: https://huggingface.co/lmsys/vicuna-13b-v1.5", max_lines=1)
|
||||||
|
shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu)
|
||||||
|
gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.")
|
||||||
|
|
||||||
|
with gr.Tab("Customize instruction template"):
|
||||||
|
with gr.Row():
|
||||||
|
shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')
|
||||||
|
ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
|
||||||
|
|
||||||
|
shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
|
||||||
|
gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.")
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
|
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
|
||||||
@ -203,6 +223,8 @@ def create_event_handlers():
|
|||||||
shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
|
shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
|
||||||
shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
|
shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
|
||||||
shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model'))
|
shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model'))
|
||||||
|
shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True)
|
||||||
|
shared.gradio['customized_template_submit'].click(save_instruction_template, gradio('model_menu', 'customized_template'), gradio('model_status'), show_progress=True)
|
||||||
|
|
||||||
|
|
||||||
def load_model_wrapper(selected_model, loader, autoload=False):
|
def load_model_wrapper(selected_model, loader, autoload=False):
|
||||||
@ -244,27 +266,58 @@ def load_lora_wrapper(selected_loras):
|
|||||||
|
|
||||||
def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
|
def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
|
||||||
try:
|
try:
|
||||||
progress(0.0)
|
|
||||||
downloader = importlib.import_module("download-model").ModelDownloader()
|
downloader = importlib.import_module("download-model").ModelDownloader()
|
||||||
|
|
||||||
|
progress(0.0)
|
||||||
model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
|
model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
|
||||||
|
|
||||||
yield ("Getting the download links from Hugging Face")
|
yield ("Getting the download links from Hugging Face")
|
||||||
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
|
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
|
||||||
if return_links:
|
if return_links:
|
||||||
yield '\n\n'.join([f"`{Path(link).name}`" for link in links])
|
output = "```\n"
|
||||||
|
for link in links:
|
||||||
|
output += f"{Path(link).name}" + "\n"
|
||||||
|
|
||||||
|
output += "```"
|
||||||
|
yield output
|
||||||
return
|
return
|
||||||
|
|
||||||
yield ("Getting the output folder")
|
yield ("Getting the output folder")
|
||||||
base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir
|
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
|
||||||
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=base_folder)
|
|
||||||
if check:
|
if check:
|
||||||
progress(0.5)
|
progress(0.5)
|
||||||
|
|
||||||
yield ("Checking previously downloaded files")
|
yield ("Checking previously downloaded files")
|
||||||
downloader.check_model_files(model, branch, links, sha256, output_folder)
|
downloader.check_model_files(model, branch, links, sha256, output_folder)
|
||||||
progress(1.0)
|
progress(1.0)
|
||||||
else:
|
else:
|
||||||
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
|
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
|
||||||
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
|
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
|
||||||
yield ("Done!")
|
|
||||||
|
yield (f"Model successfully saved to `{output_folder}/`.")
|
||||||
|
except:
|
||||||
|
progress(1.0)
|
||||||
|
yield traceback.format_exc().replace('\n', '\n\n')
|
||||||
|
|
||||||
|
|
||||||
|
def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()):
|
||||||
|
try:
|
||||||
|
downloader = importlib.import_module("download-model").ModelDownloader()
|
||||||
|
|
||||||
|
progress(0.0)
|
||||||
|
model, branch = downloader.sanitize_model_and_branch_names(unquantized_url, None)
|
||||||
|
|
||||||
|
yield ("Getting the tokenizer files links from Hugging Face")
|
||||||
|
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True)
|
||||||
|
output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF")
|
||||||
|
|
||||||
|
yield (f"Downloading tokenizer to `{output_folder}`")
|
||||||
|
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False)
|
||||||
|
|
||||||
|
# Move the GGUF
|
||||||
|
(Path(shared.args.model_dir) / gguf_name).rename(output_folder / gguf_name)
|
||||||
|
|
||||||
|
yield (f"Model saved to `{output_folder}/`.\n\nYou can now load it using llamacpp_HF.")
|
||||||
except:
|
except:
|
||||||
progress(1.0)
|
progress(1.0)
|
||||||
yield traceback.format_exc().replace('\n', '\n\n')
|
yield traceback.format_exc().replace('\n', '\n\n')
|
||||||
|
@ -76,7 +76,16 @@ def get_available_models():
|
|||||||
model_list = []
|
model_list = []
|
||||||
for item in list(Path(f'{shared.args.model_dir}/').glob('*')):
|
for item in list(Path(f'{shared.args.model_dir}/').glob('*')):
|
||||||
if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name:
|
if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name:
|
||||||
model_list.append(re.sub('.pth$', '', item.name))
|
model_list.append(item.name)
|
||||||
|
|
||||||
|
return ['None'] + sorted(model_list, key=natural_keys)
|
||||||
|
|
||||||
|
|
||||||
|
def get_available_ggufs():
|
||||||
|
model_list = []
|
||||||
|
for item in Path(f'{shared.args.model_dir}/').glob('*'):
|
||||||
|
if item.is_file() and item.name.lower().endswith(".gguf"):
|
||||||
|
model_list.append(item.name)
|
||||||
|
|
||||||
return ['None'] + sorted(model_list, key=natural_keys)
|
return ['None'] + sorted(model_list, key=natural_keys)
|
||||||
|
|
||||||
@ -105,7 +114,7 @@ def get_available_instruction_templates():
|
|||||||
if os.path.exists(path):
|
if os.path.exists(path):
|
||||||
paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
|
paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
|
||||||
|
|
||||||
return ['Select template to load...'] + sorted(set((k.stem for k in paths)), key=natural_keys)
|
return ['None'] + sorted(set((k.stem for k in paths)), key=natural_keys)
|
||||||
|
|
||||||
|
|
||||||
def get_available_extensions():
|
def get_available_extensions():
|
||||||
|
@ -24,37 +24,37 @@ tqdm
|
|||||||
wandb
|
wandb
|
||||||
|
|
||||||
# bitsandbytes
|
# bitsandbytes
|
||||||
bitsandbytes==0.41.1; platform_system != "Windows"
|
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, no tensor cores)
|
# llama-cpp-python (CUDA, no tensor cores)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, tensor cores)
|
# llama-cpp-python (CUDA, tensor cores)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
@ -28,18 +28,18 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.42+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.43+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.42+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.43+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
@ -28,16 +28,16 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
@ -24,14 +24,14 @@ tqdm
|
|||||||
wandb
|
wandb
|
||||||
|
|
||||||
# bitsandbytes
|
# bitsandbytes
|
||||||
bitsandbytes==0.41.1; platform_system != "Windows"
|
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl
|
||||||
|
@ -24,16 +24,16 @@ tqdm
|
|||||||
wandb
|
wandb
|
||||||
|
|
||||||
# bitsandbytes
|
# bitsandbytes
|
||||||
bitsandbytes==0.41.1; platform_system != "Windows"
|
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.42-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.43-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl
|
||||||
|
@ -24,11 +24,11 @@ tqdm
|
|||||||
wandb
|
wandb
|
||||||
|
|
||||||
# bitsandbytes
|
# bitsandbytes
|
||||||
bitsandbytes==0.41.1; platform_system != "Windows"
|
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
@ -24,11 +24,11 @@ tqdm
|
|||||||
wandb
|
wandb
|
||||||
|
|
||||||
# bitsandbytes
|
# bitsandbytes
|
||||||
bitsandbytes==0.41.1; platform_system != "Windows"
|
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
@ -24,37 +24,37 @@ tqdm
|
|||||||
wandb
|
wandb
|
||||||
|
|
||||||
# bitsandbytes
|
# bitsandbytes
|
||||||
bitsandbytes==0.41.1; platform_system != "Windows"
|
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.42+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.43+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, no tensor cores)
|
# llama-cpp-python (CUDA, no tensor cores)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.42+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.43+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# llama-cpp-python (CUDA, tensor cores)
|
# llama-cpp-python (CUDA, tensor cores)
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.42+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.43+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.1/exllamav2-0.0.13.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
https://github.com/oobabooga/exllamav2/releases/download/v0.0.13.2/exllamav2-0.0.13.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
|
@ -24,5 +24,5 @@ tqdm
|
|||||||
wandb
|
wandb
|
||||||
|
|
||||||
# bitsandbytes
|
# bitsandbytes
|
||||||
bitsandbytes==0.41.1; platform_system != "Windows"
|
bitsandbytes==0.42.*; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
Loading…
Reference in New Issue
Block a user