From 44018c2f69d34f6d59c4d8acc9ddc55bddcd2eb2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 Feb 2024 12:43:24 -0300 Subject: [PATCH] Add a "llamacpp_HF creator" menu (#5519) --- download-model.py | 10 ++++--- modules/ui_model_menu.py | 64 +++++++++++++++++++++++++++++++++------- modules/utils.py | 11 ++++++- 3 files changed, 69 insertions(+), 16 deletions(-) diff --git a/download-model.py b/download-model.py index 82e956d6..09bc9a86 100644 --- a/download-model.py +++ b/download-model.py @@ -156,9 +156,8 @@ class ModelDownloader: is_llamacpp = has_gguf and specific_file is not None return links, sha256, is_lora, is_llamacpp - def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None): - if base_folder is None: - base_folder = 'models' if not is_lora else 'loras' + def get_output_folder(self, model, branch, is_lora, is_llamacpp=False): + base_folder = 'models' if not is_lora else 'loras' # If the model is of type GGUF, save directly in the base_folder if is_llamacpp: @@ -303,7 +302,10 @@ if __name__ == '__main__': links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file) # Get the output folder - output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output) + if args.output: + output_folder = Path(args.output) + else: + output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp) if args.check: # Check previously downloaded files diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 387915b1..ca0de873 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -143,17 +143,27 @@ def create_ui(): shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.') shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.') shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.") - shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.") + shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.") with gr.Column(): with gr.Row(): shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu) - shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu) - shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu) - with gr.Row(): - shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu) - shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu) + with gr.Tab("Download"): + shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu) + shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu) + with gr.Row(): + shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu) + shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu) + + with gr.Tab("llamacpp_HF creator"): + with gr.Row(): + shared.gradio['gguf_menu'] = gr.Dropdown(choices=utils.get_available_ggufs(), value=lambda: shared.model_name, label='Choose your GGUF', elem_classes='slim-dropdown', interactive=not mu) + ui.create_refresh_button(shared.gradio['gguf_menu'], lambda: None, lambda: {'choices': utils.get_available_ggufs()}, 'refresh-button', interactive=not mu) + + shared.gradio['unquantized_url'] = gr.Textbox(label="Enter the URL for the original (unquantized) model", info="Example: https://huggingface.co/lmsys/vicuna-13b-v1.5", max_lines=1) + shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu) + gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.") with gr.Row(): shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready') @@ -203,6 +213,7 @@ def create_event_handlers(): shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True) shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True) shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model')) + shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True) def load_model_wrapper(selected_model, loader, autoload=False): @@ -244,27 +255,58 @@ def load_lora_wrapper(selected_loras): def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False): try: - progress(0.0) downloader = importlib.import_module("download-model").ModelDownloader() + + progress(0.0) model, branch = downloader.sanitize_model_and_branch_names(repo_id, None) + yield ("Getting the download links from Hugging Face") links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file) if return_links: - yield '\n\n'.join([f"`{Path(link).name}`" for link in links]) + output = "```\n" + for link in links: + output += f"{Path(link).name}" + "\n" + + output += "```" + yield output return yield ("Getting the output folder") - base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir - output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=base_folder) + output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp) if check: progress(0.5) + yield ("Checking previously downloaded files") downloader.check_model_files(model, branch, links, sha256, output_folder) progress(1.0) else: yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`") downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp) - yield ("Done!") + + yield (f"Model successfully saved to `{output_folder}/`.") + except: + progress(1.0) + yield traceback.format_exc().replace('\n', '\n\n') + + +def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()): + try: + downloader = importlib.import_module("download-model").ModelDownloader() + + progress(0.0) + model, branch = downloader.sanitize_model_and_branch_names(unquantized_url, None) + + yield ("Getting the tokenizer files links from Hugging Face") + links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True) + output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF") + + yield (f"Downloading tokenizer to `{output_folder}`") + downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False) + + # Move the GGUF + (Path(shared.args.model_dir) / gguf_name).rename(output_folder / gguf_name) + + yield (f"Model saved to `{output_folder}/`.\n\nYou can now load it using llamacpp_HF.") except: progress(1.0) yield traceback.format_exc().replace('\n', '\n\n') diff --git a/modules/utils.py b/modules/utils.py index de6d32dc..be06ec34 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -76,7 +76,16 @@ def get_available_models(): model_list = [] for item in list(Path(f'{shared.args.model_dir}/').glob('*')): if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name: - model_list.append(re.sub('.pth$', '', item.name)) + model_list.append(item.name) + + return ['None'] + sorted(model_list, key=natural_keys) + + +def get_available_ggufs(): + model_list = [] + for item in Path(f'{shared.args.model_dir}/').glob('*'): + if item.is_file() and item.name.lower().endswith(".gguf"): + model_list.append(item.name) return ['None'] + sorted(model_list, key=natural_keys)