From 8960fe86ae075c846c5df8848230d1904ba8877f Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 22 Apr 2024 15:41:11 +0300 Subject: [PATCH] llama : fix typo in <|im_end|> token text (#6745) --- llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 7440c740f..a25d115c1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -4340,7 +4340,7 @@ static void llm_load_vocab( } } - // find EOT token: "<|eot_id|>", "<|im_emd|>", "", etc. + // find EOT token: "<|eot_id|>", "<|im_end|>", "", etc. // // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID // for now, we apply this workaround to find the EOT token based on its text @@ -4351,7 +4351,7 @@ static void llm_load_vocab( // need to fix convert script //vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL && (t.first == "<|eot_id|>" || - t.first == "<|im_emd|>" || + t.first == "<|im_end|>" || t.first == "" ) ) {