mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-26 12:22:08 +01:00
Add a "llamacpp_HF creator" menu (#5519)
This commit is contained in:
parent
b2b74c83a6
commit
44018c2f69
@ -156,9 +156,8 @@ class ModelDownloader:
|
||||
is_llamacpp = has_gguf and specific_file is not None
|
||||
return links, sha256, is_lora, is_llamacpp
|
||||
|
||||
def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None):
|
||||
if base_folder is None:
|
||||
base_folder = 'models' if not is_lora else 'loras'
|
||||
def get_output_folder(self, model, branch, is_lora, is_llamacpp=False):
|
||||
base_folder = 'models' if not is_lora else 'loras'
|
||||
|
||||
# If the model is of type GGUF, save directly in the base_folder
|
||||
if is_llamacpp:
|
||||
@ -303,7 +302,10 @@ if __name__ == '__main__':
|
||||
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)
|
||||
|
||||
# Get the output folder
|
||||
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output)
|
||||
if args.output:
|
||||
output_folder = Path(args.output)
|
||||
else:
|
||||
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
|
||||
|
||||
if args.check:
|
||||
# Check previously downloaded files
|
||||
|
@ -143,17 +143,27 @@ def create_ui():
|
||||
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
|
||||
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
|
||||
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
|
||||
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.")
|
||||
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.")
|
||||
|
||||
with gr.Column():
|
||||
with gr.Row():
|
||||
shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu)
|
||||
|
||||
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
|
||||
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
|
||||
with gr.Row():
|
||||
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
|
||||
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
|
||||
with gr.Tab("Download"):
|
||||
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
|
||||
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
|
||||
with gr.Row():
|
||||
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
|
||||
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
|
||||
|
||||
with gr.Tab("llamacpp_HF creator"):
|
||||
with gr.Row():
|
||||
shared.gradio['gguf_menu'] = gr.Dropdown(choices=utils.get_available_ggufs(), value=lambda: shared.model_name, label='Choose your GGUF', elem_classes='slim-dropdown', interactive=not mu)
|
||||
ui.create_refresh_button(shared.gradio['gguf_menu'], lambda: None, lambda: {'choices': utils.get_available_ggufs()}, 'refresh-button', interactive=not mu)
|
||||
|
||||
shared.gradio['unquantized_url'] = gr.Textbox(label="Enter the URL for the original (unquantized) model", info="Example: https://huggingface.co/lmsys/vicuna-13b-v1.5", max_lines=1)
|
||||
shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu)
|
||||
gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.")
|
||||
|
||||
with gr.Row():
|
||||
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
|
||||
@ -203,6 +213,7 @@ def create_event_handlers():
|
||||
shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
|
||||
shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
|
||||
shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model'))
|
||||
shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True)
|
||||
|
||||
|
||||
def load_model_wrapper(selected_model, loader, autoload=False):
|
||||
@ -244,27 +255,58 @@ def load_lora_wrapper(selected_loras):
|
||||
|
||||
def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
|
||||
try:
|
||||
progress(0.0)
|
||||
downloader = importlib.import_module("download-model").ModelDownloader()
|
||||
|
||||
progress(0.0)
|
||||
model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
|
||||
|
||||
yield ("Getting the download links from Hugging Face")
|
||||
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
|
||||
if return_links:
|
||||
yield '\n\n'.join([f"`{Path(link).name}`" for link in links])
|
||||
output = "```\n"
|
||||
for link in links:
|
||||
output += f"{Path(link).name}" + "\n"
|
||||
|
||||
output += "```"
|
||||
yield output
|
||||
return
|
||||
|
||||
yield ("Getting the output folder")
|
||||
base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir
|
||||
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=base_folder)
|
||||
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
|
||||
if check:
|
||||
progress(0.5)
|
||||
|
||||
yield ("Checking previously downloaded files")
|
||||
downloader.check_model_files(model, branch, links, sha256, output_folder)
|
||||
progress(1.0)
|
||||
else:
|
||||
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
|
||||
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
|
||||
yield ("Done!")
|
||||
|
||||
yield (f"Model successfully saved to `{output_folder}/`.")
|
||||
except:
|
||||
progress(1.0)
|
||||
yield traceback.format_exc().replace('\n', '\n\n')
|
||||
|
||||
|
||||
def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()):
|
||||
try:
|
||||
downloader = importlib.import_module("download-model").ModelDownloader()
|
||||
|
||||
progress(0.0)
|
||||
model, branch = downloader.sanitize_model_and_branch_names(unquantized_url, None)
|
||||
|
||||
yield ("Getting the tokenizer files links from Hugging Face")
|
||||
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True)
|
||||
output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF")
|
||||
|
||||
yield (f"Downloading tokenizer to `{output_folder}`")
|
||||
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False)
|
||||
|
||||
# Move the GGUF
|
||||
(Path(shared.args.model_dir) / gguf_name).rename(output_folder / gguf_name)
|
||||
|
||||
yield (f"Model saved to `{output_folder}/`.\n\nYou can now load it using llamacpp_HF.")
|
||||
except:
|
||||
progress(1.0)
|
||||
yield traceback.format_exc().replace('\n', '\n\n')
|
||||
|
@ -76,7 +76,16 @@ def get_available_models():
|
||||
model_list = []
|
||||
for item in list(Path(f'{shared.args.model_dir}/').glob('*')):
|
||||
if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name:
|
||||
model_list.append(re.sub('.pth$', '', item.name))
|
||||
model_list.append(item.name)
|
||||
|
||||
return ['None'] + sorted(model_list, key=natural_keys)
|
||||
|
||||
|
||||
def get_available_ggufs():
|
||||
model_list = []
|
||||
for item in Path(f'{shared.args.model_dir}/').glob('*'):
|
||||
if item.is_file() and item.name.lower().endswith(".gguf"):
|
||||
model_list.append(item.name)
|
||||
|
||||
return ['None'] + sorted(model_list, key=natural_keys)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user