From 9d3ba0bacdb6705ec2c8ad1ce21e1dfb6f6a2db8 Mon Sep 17 00:00:00 2001 From: marcus Date: Fri, 24 Nov 2023 17:27:18 -0800 Subject: [PATCH] improvement for the appended 0 --- llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index ec23485fd..f2b5967d7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6425,8 +6425,8 @@ static std::pair, llama_partial_utf8> decode_utf8( static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 }; const char * pos = src; std::vector code_points; - // common english strings have the same number of codepoints and bytes. - code_points.reserve(n_src); + // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0. + code_points.reserve(n_src + 1); uint32_t value = partial_start.value; int n_remain = partial_start.n_remain;