From c5ba5efda2a3b3b9ee3d42d908dde2b92dc80bc1 Mon Sep 17 00:00:00 2001 From: klosax <131523366+klosax@users.noreply.github.com> Date: Wed, 2 Aug 2023 11:26:07 +0200 Subject: [PATCH] convert-llama-h5-to-gguf.py : special tokens --- convert-llama-h5-to-gguf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-h5-to-gguf.py index 7a5dae77a..67b3c55d4 100644 --- a/convert-llama-h5-to-gguf.py +++ b/convert-llama-h5-to-gguf.py @@ -128,27 +128,27 @@ if Path(dir_model + "/tokenizer.json").is_file(): if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] != None: for key in tokenizer["added_tokens"]: - if key["content"] == tokenizer_config["bos_token"] or key["content"] == tokenizer_config["bos_token"]["content"]: + if key["content"] == tokenizer_config["bos_token"]["content"]: gguf_writer.add_bos_token_id(key["id"]) if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] != None: for key in tokenizer["added_tokens"]: - if key["content"] == tokenizer_config["eos_token"] or key["content"] == tokenizer_config["eos_token"]["content"]: + if key["content"] == tokenizer_config["eos_token"]["content"]: gguf_writer.add_eos_token_id(key["id"]) if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] != None: for key in tokenizer["added_tokens"]: - if key["content"] == tokenizer_config["unk_token"] or key["content"] == tokenizer_config["unk_token"]["content"]: + if key["content"] == tokenizer_config["unk_token"]["content"]: gguf_writer.add_unk_token_id(key["id"]) if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] != None: for key in tokenizer["added_tokens"]: - if key["content"] == tokenizer_config["sep_token"] or key["content"] == tokenizer_config["sep_token"]["content"]: + if key["content"] == tokenizer_config["sep_token"]["content"]: gguf_writer.add_sep_token_id(key["id"]) if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] != None: for key in tokenizer["added_tokens"]: - if key["content"] == tokenizer_config["pad_token"] or key["content"] == tokenizer_config["pad_token"]["content"]: + if key["content"] == tokenizer_config["pad_token"]["content"]: gguf_writer.add_pad_token_id(key["id"])