From 44018c2f69d34f6d59c4d8acc9ddc55bddcd2eb2 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 16 Feb 2024 12:43:24 -0300
Subject: [PATCH] Add a "llamacpp_HF creator" menu (#5519)

---
 download-model.py        | 10 ++++---
 modules/ui_model_menu.py | 64 +++++++++++++++++++++++++++++++++-------
 modules/utils.py         | 11 ++++++-
 3 files changed, 69 insertions(+), 16 deletions(-)

diff --git a/download-model.py b/download-model.py
index 82e956d6..09bc9a86 100644
--- a/download-model.py
+++ b/download-model.py
@@ -156,9 +156,8 @@ class ModelDownloader:
         is_llamacpp = has_gguf and specific_file is not None
         return links, sha256, is_lora, is_llamacpp
 
-    def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None):
-        if base_folder is None:
-            base_folder = 'models' if not is_lora else 'loras'
+    def get_output_folder(self, model, branch, is_lora, is_llamacpp=False):
+        base_folder = 'models' if not is_lora else 'loras'
 
         # If the model is of type GGUF, save directly in the base_folder
         if is_llamacpp:
@@ -303,7 +302,10 @@ if __name__ == '__main__':
     links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)
 
     # Get the output folder
-    output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output)
+    if args.output:
+        output_folder = Path(args.output)
+    else:
+        output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
 
     if args.check:
         # Check previously downloaded files
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 387915b1..ca0de873 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -143,17 +143,27 @@ def create_ui():
                             shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
                             shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
                             shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
-                            shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.")
+                            shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.")
 
             with gr.Column():
                 with gr.Row():
                     shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu)
 
-                shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
-                shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
-                with gr.Row():
-                    shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
-                    shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
+                with gr.Tab("Download"):
+                    shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
+                    shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
+                    with gr.Row():
+                        shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
+                        shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
+
+                with gr.Tab("llamacpp_HF creator"):
+                    with gr.Row():
+                        shared.gradio['gguf_menu'] = gr.Dropdown(choices=utils.get_available_ggufs(), value=lambda: shared.model_name, label='Choose your GGUF', elem_classes='slim-dropdown', interactive=not mu)
+                        ui.create_refresh_button(shared.gradio['gguf_menu'], lambda: None, lambda: {'choices': utils.get_available_ggufs()}, 'refresh-button', interactive=not mu)
+
+                    shared.gradio['unquantized_url'] = gr.Textbox(label="Enter the URL for the original (unquantized) model", info="Example: https://huggingface.co/lmsys/vicuna-13b-v1.5", max_lines=1)
+                    shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu)
+                    gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.")
 
                 with gr.Row():
                     shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
@@ -203,6 +213,7 @@ def create_event_handlers():
     shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
     shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
     shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model'))
+    shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True)
 
 
 def load_model_wrapper(selected_model, loader, autoload=False):
@@ -244,27 +255,58 @@ def load_lora_wrapper(selected_loras):
 
 def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
     try:
-        progress(0.0)
         downloader = importlib.import_module("download-model").ModelDownloader()
+
+        progress(0.0)
         model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
+
         yield ("Getting the download links from Hugging Face")
         links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
         if return_links:
-            yield '\n\n'.join([f"`{Path(link).name}`" for link in links])
+            output = "```\n"
+            for link in links:
+                output += f"{Path(link).name}" + "\n"
+
+            output += "```"
+            yield output
             return
 
         yield ("Getting the output folder")
-        base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir
-        output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=base_folder)
+        output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
         if check:
             progress(0.5)
+
             yield ("Checking previously downloaded files")
             downloader.check_model_files(model, branch, links, sha256, output_folder)
             progress(1.0)
         else:
             yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
             downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
-            yield ("Done!")
+
+            yield (f"Model successfully saved to `{output_folder}/`.")
+    except:
+        progress(1.0)
+        yield traceback.format_exc().replace('\n', '\n\n')
+
+
+def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()):
+    try:
+        downloader = importlib.import_module("download-model").ModelDownloader()
+
+        progress(0.0)
+        model, branch = downloader.sanitize_model_and_branch_names(unquantized_url, None)
+
+        yield ("Getting the tokenizer files links from Hugging Face")
+        links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True)
+        output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF")
+
+        yield (f"Downloading tokenizer to `{output_folder}`")
+        downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False)
+
+        # Move the GGUF
+        (Path(shared.args.model_dir) / gguf_name).rename(output_folder / gguf_name)
+
+        yield (f"Model saved to `{output_folder}/`.\n\nYou can now load it using llamacpp_HF.")
     except:
         progress(1.0)
         yield traceback.format_exc().replace('\n', '\n\n')
diff --git a/modules/utils.py b/modules/utils.py
index de6d32dc..be06ec34 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -76,7 +76,16 @@ def get_available_models():
     model_list = []
     for item in list(Path(f'{shared.args.model_dir}/').glob('*')):
         if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name:
-            model_list.append(re.sub('.pth$', '', item.name))
+            model_list.append(item.name)
+
+    return ['None'] + sorted(model_list, key=natural_keys)
+
+
+def get_available_ggufs():
+    model_list = []
+    for item in Path(f'{shared.args.model_dir}/').glob('*'):
+        if item.is_file() and item.name.lower().endswith(".gguf"):
+            model_list.append(item.name)
 
     return ['None'] + sorted(model_list, key=natural_keys)