llama : fix CodeLlama FIM token checks (#8144)

* account for space prefix character

* use find instead
This commit is contained in:
Sigbjørn Skjæret 2024-06-27 09:46:41 +02:00 committed by GitHub
parent ac146628e4
commit 911e35bb8b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -5152,10 +5152,10 @@ static void llm_load_vocab(
if (gen_name.find("code") != std::string::npos) { if (gen_name.find("code") != std::string::npos) {
if (model.arch == LLM_ARCH_LLAMA if (model.arch == LLM_ARCH_LLAMA
&& 32010 < vocab.id_to_token.size() && 32010 < vocab.id_to_token.size()
&& vocab.id_to_token[32007].text == "<PRE>" && vocab.id_to_token[32007].text.find("<PRE>") != std::string::npos
&& vocab.id_to_token[32008].text == "<SUF>" && vocab.id_to_token[32008].text.find("<SUF>") != std::string::npos
&& vocab.id_to_token[32009].text == "<MID>" && vocab.id_to_token[32009].text.find("<MID>") != std::string::npos
&& vocab.id_to_token[32010].text == "<EOT>") { && vocab.id_to_token[32010].text.find("<EOT>") != std::string::npos) {
vocab.special_prefix_id = 32007; vocab.special_prefix_id = 32007;
vocab.special_suffix_id = 32008; vocab.special_suffix_id = 32008;
vocab.special_middle_id = 32009; vocab.special_middle_id = 32009;