mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 22:59:24 +01:00
llama : set attrs of mislabelled EOT/EOM tokens (#9348)
This commit is contained in:
parent
a5b5d9a101
commit
fbb7fcffbc
@ -6399,6 +6399,11 @@ static void llm_load_vocab(
|
|||||||
)
|
)
|
||||||
) {
|
) {
|
||||||
vocab.special_eot_id = t.second;
|
vocab.special_eot_id = t.second;
|
||||||
|
if ((vocab.id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||||
|
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||||
|
__func__, t.first.c_str());
|
||||||
|
vocab.id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -6412,6 +6417,11 @@ static void llm_load_vocab(
|
|||||||
const auto & t = vocab.token_to_id.find("<|eom_id|>");
|
const auto & t = vocab.token_to_id.find("<|eom_id|>");
|
||||||
if (t != vocab.token_to_id.end()) {
|
if (t != vocab.token_to_id.end()) {
|
||||||
vocab.special_eom_id = t->second;
|
vocab.special_eom_id = t->second;
|
||||||
|
if ((vocab.id_to_token[t->second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
||||||
|
LLAMA_LOG_WARN("%s: control-looking token: '%s' was not control-type; this is probably a bug in the model. its type will be overridden\n",
|
||||||
|
__func__, t->first.c_str());
|
||||||
|
vocab.id_to_token[t->second].attr = LLAMA_TOKEN_ATTR_CONTROL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user