diff --git a/modules/models.py b/modules/models.py index 038669f3..d8f1a9f8 100644 --- a/modules/models.py +++ b/modules/models.py @@ -254,26 +254,17 @@ def llamacpp_loader(model_name): def llamacpp_HF_loader(model_name): from modules.llamacpp_hf import LlamacppHF - for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]: - path = Path(f'{shared.args.model_dir}/{fname}') - if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']): - logger.info(f'Using tokenizer from: \"{path}\"') - break + path = Path(f'{shared.args.model_dir}/{model_name}') + + # Check if a HF tokenizer is available for the model + if all((path / file).exists() for file in ['tokenizer.model', 'tokenizer_config.json']): + logger.info(f'Using tokenizer from: \"{path}\"') else: - logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.") + logger.error("Could not load the model because a tokenizer in Transformers format was not found.") return None, None - if shared.args.no_use_fast: - logger.info('Loading the tokenizer with use_fast=False.') - - tokenizer = AutoTokenizer.from_pretrained( - path, - trust_remote_code=shared.args.trust_remote_code, - use_fast=not shared.args.no_use_fast - ) - model = LlamacppHF.from_pretrained(model_name) - return model, tokenizer + return model def ctransformers_loader(model_name): diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 23679097..09d4276c 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -143,7 +143,7 @@ def create_ui(): shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.') shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.') shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.") - shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to download a tokenizer.\n\nOption 1 (recommended): place your .gguf in a subfolder of models/ along with these 4 files: special_tokens_map.json, tokenizer_config.json, tokenizer.json, tokenizer.model.\n\nOption 2: download `oobabooga/llama-tokenizer` under "Download model or LoRA". That\'s a default Llama tokenizer that will work for some (but not all) models.') ++ shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.") with gr.Column(): with gr.Row():