llamacpp_HF: do not use oobabooga/llama-tokenizer (#5499)

This commit is contained in:
oobabooga 2024-02-14 00:28:51 -03:00 committed by GitHub
parent 3a9ce3cfa6
commit d47182d9d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 17 deletions

View File

@ -254,26 +254,17 @@ def llamacpp_loader(model_name):
def llamacpp_HF_loader(model_name):
from modules.llamacpp_hf import LlamacppHF
for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]:
path = Path(f'{shared.args.model_dir}/{fname}')
if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']):
path = Path(f'{shared.args.model_dir}/{model_name}')
# Check if a HF tokenizer is available for the model
if all((path / file).exists() for file in ['tokenizer.model', 'tokenizer_config.json']):
logger.info(f'Using tokenizer from: \"{path}\"')
break
else:
logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.")
logger.error("Could not load the model because a tokenizer in Transformers format was not found.")
return None, None
if shared.args.no_use_fast:
logger.info('Loading the tokenizer with use_fast=False.')
tokenizer = AutoTokenizer.from_pretrained(
path,
trust_remote_code=shared.args.trust_remote_code,
use_fast=not shared.args.no_use_fast
)
model = LlamacppHF.from_pretrained(model_name)
return model, tokenizer
return model
def ctransformers_loader(model_name):

View File

@ -143,7 +143,7 @@ def create_ui():
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to download a tokenizer.\n\nOption 1 (recommended): place your .gguf in a subfolder of models/ along with these 4 files: special_tokens_map.json, tokenizer_config.json, tokenizer.json, tokenizer.model.\n\nOption 2: download `oobabooga/llama-tokenizer` under "Download model or LoRA". That\'s a default Llama tokenizer that will work for some (but not all) models.')
+ shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.")
with gr.Column():
with gr.Row():