mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-04 01:57:53 +01:00
server : enable special tokens during tokenization by default
This commit is contained in:
parent
b94b10914c
commit
c544faed74
@ -618,6 +618,11 @@ struct llama_server_context
|
|||||||
|
|
||||||
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
|
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
|
||||||
{
|
{
|
||||||
|
// TODO: currently, we tokenize using special tokens by default
|
||||||
|
// this is not always correct (see https://github.com/ggerganov/llama.cpp/pull/4160#issuecomment-1824826216)
|
||||||
|
// but it's better compared to completely ignoring ChatML and other chat templates
|
||||||
|
const bool TMP_FORCE_SPECIAL = true;
|
||||||
|
|
||||||
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
|
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
|
||||||
// or the first element of the json_prompt array is a string.
|
// or the first element of the json_prompt array is a string.
|
||||||
std::vector<llama_token> prompt_tokens;
|
std::vector<llama_token> prompt_tokens;
|
||||||
@ -633,12 +638,12 @@ struct llama_server_context
|
|||||||
std::vector<llama_token> p;
|
std::vector<llama_token> p;
|
||||||
if (first)
|
if (first)
|
||||||
{
|
{
|
||||||
p = ::llama_tokenize(ctx, s, add_bos);
|
p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||||
first = false;
|
first = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
p = ::llama_tokenize(ctx, s, false);
|
p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
|
||||||
}
|
}
|
||||||
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
|
||||||
}
|
}
|
||||||
@ -655,7 +660,7 @@ struct llama_server_context
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto s = json_prompt.template get<std::string>();
|
auto s = json_prompt.template get<std::string>();
|
||||||
prompt_tokens = ::llama_tokenize(ctx, s, add_bos);
|
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
return prompt_tokens;
|
return prompt_tokens;
|
||||||
@ -2235,7 +2240,7 @@ std::string format_chatml(std::vector<json> messages)
|
|||||||
|
|
||||||
for (auto it = messages.begin(); it != messages.end(); ++it) {
|
for (auto it = messages.begin(); it != messages.end(); ++it) {
|
||||||
chatml_msgs << "<|im_start|>"
|
chatml_msgs << "<|im_start|>"
|
||||||
<< json_value(*it, "role", std::string("user")) << '\n';
|
<< json_value(*it, "role", std::string("user")) << '\n';
|
||||||
chatml_msgs << json_value(*it, "content", std::string(""))
|
chatml_msgs << json_value(*it, "content", std::string(""))
|
||||||
<< "<|im_end|>\n";
|
<< "<|im_end|>\n";
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user