From 6796e7450c3c406a7d2f10498db7083f21f8a7bb Mon Sep 17 00:00:00 2001 From: vvhg1 Date: Fri, 6 Oct 2023 18:35:50 +0200 Subject: [PATCH] serverinfill tokens correction --- examples/server/server.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index c53a64867..fa5e52e82 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -344,9 +344,10 @@ struct llama_server_context void loadInfill() { - auto prefix_tokens = tokenize(params.input_prefix, true); // always add BOS - auto suffix_tokens = tokenize(params.input_suffix, true); // always add BOS + auto prefix_tokens = tokenize(params.input_prefix, false); + auto suffix_tokens = tokenize(params.input_suffix, false); prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(ctx)); + prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(ctx)); // always add BOS prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(ctx)); prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end()); prefix_tokens.push_back(llama_token_middle(ctx));