llama : fix typo in <|im_end|> token text (#6745)

This commit is contained in:
Georgi Gerganov 2024-04-22 15:41:11 +03:00
parent c0956b09ba
commit 8960fe86ae
No known key found for this signature in database
GPG Key ID: BF970631944C16B7

View File

@ -4340,7 +4340,7 @@ static void llm_load_vocab(
} }
} }
// find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc. // find EOT token: "<|eot_id|>", "<|im_end|>", "<end_of_turn>", etc.
// //
// TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
// for now, we apply this workaround to find the EOT token based on its text // for now, we apply this workaround to find the EOT token based on its text
@ -4351,7 +4351,7 @@ static void llm_load_vocab(
// need to fix convert script // need to fix convert script
//vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL && //vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
(t.first == "<|eot_id|>" || (t.first == "<|eot_id|>" ||
t.first == "<|im_emd|>" || t.first == "<|im_end|>" ||
t.first == "<end_of_turn>" t.first == "<end_of_turn>"
) )
) { ) {