Fix GGUFs with no BOS token present, mainly qwen2 models. (#6119)

---------

Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
Forkoz 2024-06-14 11:51:01 -05:00 committed by GitHub
parent fdd8fab9cf
commit 1576227f16
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -64,7 +64,11 @@ def get_model_metadata(model):
if 'tokenizer.chat_template' in metadata: if 'tokenizer.chat_template' in metadata:
template = metadata['tokenizer.chat_template'] template = metadata['tokenizer.chat_template']
eos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.eos_token_id']] eos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.eos_token_id']]
if 'tokenizer.ggml.bos_token_id' in metadata:
bos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.bos_token_id']] bos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.bos_token_id']]
else:
bos_token = ""
template = template.replace('eos_token', "'{}'".format(eos_token)) template = template.replace('eos_token', "'{}'".format(eos_token))
template = template.replace('bos_token', "'{}'".format(bos_token)) template = template.replace('bos_token', "'{}'".format(bos_token))