Add a "llamacpp_HF creator" menu (#5519)

This commit is contained in:
oobabooga 2024-02-16 12:43:24 -03:00 committed by GitHub
parent b2b74c83a6
commit 44018c2f69
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 69 additions and 16 deletions

View File

@ -156,9 +156,8 @@ class ModelDownloader:
is_llamacpp = has_gguf and specific_file is not None is_llamacpp = has_gguf and specific_file is not None
return links, sha256, is_lora, is_llamacpp return links, sha256, is_lora, is_llamacpp
def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None): def get_output_folder(self, model, branch, is_lora, is_llamacpp=False):
if base_folder is None: base_folder = 'models' if not is_lora else 'loras'
base_folder = 'models' if not is_lora else 'loras'
# If the model is of type GGUF, save directly in the base_folder # If the model is of type GGUF, save directly in the base_folder
if is_llamacpp: if is_llamacpp:
@ -303,7 +302,10 @@ if __name__ == '__main__':
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file) links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)
# Get the output folder # Get the output folder
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output) if args.output:
output_folder = Path(args.output)
else:
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
if args.check: if args.check:
# Check previously downloaded files # Check previously downloaded files

View File

@ -143,17 +143,27 @@ def create_ui():
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.') shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.') shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.") shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.") shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to place your GGUF in a subfolder of models/ with the necessary tokenizer files.\n\nYou can use the \"llamacpp_HF creator\" menu to do that automatically.")
with gr.Column(): with gr.Column():
with gr.Row(): with gr.Row():
shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu) shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.', interactive=not mu)
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu) with gr.Tab("Download"):
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu) shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
with gr.Row(): shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu) with gr.Row():
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu) shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
with gr.Tab("llamacpp_HF creator"):
with gr.Row():
shared.gradio['gguf_menu'] = gr.Dropdown(choices=utils.get_available_ggufs(), value=lambda: shared.model_name, label='Choose your GGUF', elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['gguf_menu'], lambda: None, lambda: {'choices': utils.get_available_ggufs()}, 'refresh-button', interactive=not mu)
shared.gradio['unquantized_url'] = gr.Textbox(label="Enter the URL for the original (unquantized) model", info="Example: https://huggingface.co/lmsys/vicuna-13b-v1.5", max_lines=1)
shared.gradio['create_llamacpp_hf_button'] = gr.Button("Submit", variant="primary", interactive=not mu)
gr.Markdown("This will move your gguf file into a subfolder of `models` along with the necessary tokenizer files.")
with gr.Row(): with gr.Row():
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready') shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
@ -203,6 +213,7 @@ def create_event_handlers():
shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True) shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True) shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model')) shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model'))
shared.gradio['create_llamacpp_hf_button'].click(create_llamacpp_hf, gradio('gguf_menu', 'unquantized_url'), gradio('model_status'), show_progress=True)
def load_model_wrapper(selected_model, loader, autoload=False): def load_model_wrapper(selected_model, loader, autoload=False):
@ -244,27 +255,58 @@ def load_lora_wrapper(selected_loras):
def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False): def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
try: try:
progress(0.0)
downloader = importlib.import_module("download-model").ModelDownloader() downloader = importlib.import_module("download-model").ModelDownloader()
progress(0.0)
model, branch = downloader.sanitize_model_and_branch_names(repo_id, None) model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
yield ("Getting the download links from Hugging Face") yield ("Getting the download links from Hugging Face")
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file) links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
if return_links: if return_links:
yield '\n\n'.join([f"`{Path(link).name}`" for link in links]) output = "```\n"
for link in links:
output += f"{Path(link).name}" + "\n"
output += "```"
yield output
return return
yield ("Getting the output folder") yield ("Getting the output folder")
base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp)
output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=base_folder)
if check: if check:
progress(0.5) progress(0.5)
yield ("Checking previously downloaded files") yield ("Checking previously downloaded files")
downloader.check_model_files(model, branch, links, sha256, output_folder) downloader.check_model_files(model, branch, links, sha256, output_folder)
progress(1.0) progress(1.0)
else: else:
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`") yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp) downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
yield ("Done!")
yield (f"Model successfully saved to `{output_folder}/`.")
except:
progress(1.0)
yield traceback.format_exc().replace('\n', '\n\n')
def create_llamacpp_hf(gguf_name, unquantized_url, progress=gr.Progress()):
try:
downloader = importlib.import_module("download-model").ModelDownloader()
progress(0.0)
model, branch = downloader.sanitize_model_and_branch_names(unquantized_url, None)
yield ("Getting the tokenizer files links from Hugging Face")
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=True)
output_folder = Path(shared.args.model_dir) / (re.sub(r'(?i)\.gguf$', '', gguf_name) + "-HF")
yield (f"Downloading tokenizer to `{output_folder}`")
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=False)
# Move the GGUF
(Path(shared.args.model_dir) / gguf_name).rename(output_folder / gguf_name)
yield (f"Model saved to `{output_folder}/`.\n\nYou can now load it using llamacpp_HF.")
except: except:
progress(1.0) progress(1.0)
yield traceback.format_exc().replace('\n', '\n\n') yield traceback.format_exc().replace('\n', '\n\n')

View File

@ -76,7 +76,16 @@ def get_available_models():
model_list = [] model_list = []
for item in list(Path(f'{shared.args.model_dir}/').glob('*')): for item in list(Path(f'{shared.args.model_dir}/').glob('*')):
if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name: if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name:
model_list.append(re.sub('.pth$', '', item.name)) model_list.append(item.name)
return ['None'] + sorted(model_list, key=natural_keys)
def get_available_ggufs():
model_list = []
for item in Path(f'{shared.args.model_dir}/').glob('*'):
if item.is_file() and item.name.lower().endswith(".gguf"):
model_list.append(item.name)
return ['None'] + sorted(model_list, key=natural_keys) return ['None'] + sorted(model_list, key=natural_keys)