safer jinja llama_chat_templates struct

This commit is contained in:
Xuan Son Nguyen 2025-01-20 16:58:29 +01:00
parent cc50356470
commit c9e7cbb08b
3 changed files with 17 additions and 14 deletions

View File

@ -1855,13 +1855,12 @@ llama_chat_templates llama_chat_templates_from_model(const struct llama_model *
)"; )";
} }
} }
return { llama_chat_templates ret(default_template_src, bos_token, eos_token);
has_explicit_template, ret.has_explicit_template = has_explicit_template;
std::make_unique<minja::chat_template>(default_template_src, bos_token, eos_token), ret.tool_use_template.reset(tool_use_template_src.empty()
tool_use_template_src.empty()
? nullptr ? nullptr
: std::make_unique<minja::chat_template>(tool_use_template_src, bos_token, eos_token) : new minja::chat_template(tool_use_template_src, bos_token, eos_token));
}; return ret;
} }
// //

View File

@ -607,8 +607,13 @@ typedef minja::chat_template llama_chat_template;
struct llama_chat_templates { struct llama_chat_templates {
bool has_explicit_template; // Model had builtin template or template overridde was specified. bool has_explicit_template; // Model had builtin template or template overridde was specified.
std::unique_ptr<llama_chat_template> default_template; // always set (defaults to chatml) llama_chat_template default_template; // always set (defaults to chatml)
std::unique_ptr<llama_chat_template> tool_use_template; std::unique_ptr<llama_chat_template> tool_use_template;
llama_chat_templates(
const std::string & source,
const std::string & bos_token,
const std::string & eos_token) : default_template(source, bos_token, eos_token) {}
}; };
// CPP wrapper for llama_chat_apply_template // CPP wrapper for llama_chat_apply_template

View File

@ -1748,7 +1748,7 @@ struct server_context {
auto templates = llama_chat_templates_from_model(model, ""); auto templates = llama_chat_templates_from_model(model, "");
GGML_ASSERT(templates.default_template); GGML_ASSERT(templates.default_template);
try { try {
templates.default_template->apply({{ templates.default_template.apply({{
{"role", "user"}, {"role", "user"},
{"content", "test"}, {"content", "test"},
}}, json(), true); }}, json(), true);
@ -3632,7 +3632,6 @@ int main(int argc, char ** argv) {
std::lock_guard<std::mutex> lock(chat_templates_mutex); std::lock_guard<std::mutex> lock(chat_templates_mutex);
if (!chat_templates) { if (!chat_templates) {
chat_templates = llama_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template); chat_templates = llama_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template);
GGML_ASSERT(chat_templates->default_template);
} }
return *chat_templates; return *chat_templates;
}; };
@ -3644,7 +3643,7 @@ int main(int argc, char ** argv) {
{ "default_generation_settings", ctx_server.default_generation_settings_for_props }, { "default_generation_settings", ctx_server.default_generation_settings_for_props },
{ "total_slots", ctx_server.params_base.n_parallel }, { "total_slots", ctx_server.params_base.n_parallel },
{ "model_path", ctx_server.params_base.model }, { "model_path", ctx_server.params_base.model },
{ "chat_template", templates.default_template->source() }, { "chat_template", templates.default_template.source() },
{ "build_info", build_info }, { "build_info", build_info },
}; };
if (ctx_server.params_base.use_jinja && templates.tool_use_template) { if (ctx_server.params_base.use_jinja && templates.tool_use_template) {
@ -3871,7 +3870,7 @@ int main(int argc, char ** argv) {
auto body = json::parse(req.body); auto body = json::parse(req.body);
const auto & templates = get_chat_templates(); const auto & templates = get_chat_templates();
const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : *templates.default_template; const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : templates.default_template;
json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);
return handle_completions_impl( return handle_completions_impl(
@ -4290,8 +4289,8 @@ int main(int argc, char ** argv) {
// print sample chat example to make it clear which template is used // print sample chat example to make it clear which template is used
LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__, LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__,
get_chat_templates().default_template->source().c_str(), get_chat_templates().default_template.source().c_str(),
common_chat_format_example(*get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str()); common_chat_format_example(get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str());
ctx_server.queue_tasks.on_new_task(std::bind( ctx_server.queue_tasks.on_new_task(std::bind(
&server_context::process_single_task, &ctx_server, std::placeholders::_1)); &server_context::process_single_task, &ctx_server, std::placeholders::_1));