mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-12-26 14:20:40 +01:00
llamacpp_HF: do not use oobabooga/llama-tokenizer (#5499)
This commit is contained in:
parent
3a9ce3cfa6
commit
d47182d9d1
@ -254,26 +254,17 @@ def llamacpp_loader(model_name):
|
||||
def llamacpp_HF_loader(model_name):
|
||||
from modules.llamacpp_hf import LlamacppHF
|
||||
|
||||
for fname in [model_name, "oobabooga_llama-tokenizer", "llama-tokenizer"]:
|
||||
path = Path(f'{shared.args.model_dir}/{fname}')
|
||||
if all((path / file).exists() for file in ['tokenizer_config.json', 'special_tokens_map.json', 'tokenizer.model']):
|
||||
logger.info(f'Using tokenizer from: \"{path}\"')
|
||||
break
|
||||
path = Path(f'{shared.args.model_dir}/{model_name}')
|
||||
|
||||
# Check if a HF tokenizer is available for the model
|
||||
if all((path / file).exists() for file in ['tokenizer.model', 'tokenizer_config.json']):
|
||||
logger.info(f'Using tokenizer from: \"{path}\"')
|
||||
else:
|
||||
logger.error("Could not load the model because a tokenizer in transformers format was not found. Please download oobabooga/llama-tokenizer.")
|
||||
logger.error("Could not load the model because a tokenizer in Transformers format was not found.")
|
||||
return None, None
|
||||
|
||||
if shared.args.no_use_fast:
|
||||
logger.info('Loading the tokenizer with use_fast=False.')
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
path,
|
||||
trust_remote_code=shared.args.trust_remote_code,
|
||||
use_fast=not shared.args.no_use_fast
|
||||
)
|
||||
|
||||
model = LlamacppHF.from_pretrained(model_name)
|
||||
return model, tokenizer
|
||||
return model
|
||||
|
||||
|
||||
def ctransformers_loader(model_name):
|
||||
|
@ -143,7 +143,7 @@ def create_ui():
|
||||
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
|
||||
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
|
||||
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
|
||||
shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to download a tokenizer.\n\nOption 1 (recommended): place your .gguf in a subfolder of models/ along with these 4 files: special_tokens_map.json, tokenizer_config.json, tokenizer.json, tokenizer.model.\n\nOption 2: download `oobabooga/llama-tokenizer` under "Download model or LoRA". That\'s a default Llama tokenizer that will work for some (but not all) models.')
|
||||
+ shared.gradio['llamacpp_HF_info'] = gr.Markdown("llamacpp_HF loads llama.cpp as a Transformers model. To use it, download a tokenizer in HF format for your GGUF:\n\n1. Create a folder inside models/\n2. Place your GGUF in the new folder.\n3. Add the original model's tokenizer files there: `tokenizer.model`, `tokenizer_config.json`, `tokenizer.json`, and `special_tokens_map.json`.")
|
||||
|
||||
with gr.Column():
|
||||
with gr.Row():
|
||||
|
Loading…
Reference in New Issue
Block a user