improvement for the appended 0

This commit is contained in:
marcus 2023-11-24 17:27:18 -08:00
parent a4b7b4c398
commit 9d3ba0bacd

View File

@ -6425,8 +6425,8 @@ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 }; static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
const char * pos = src; const char * pos = src;
std::vector<uint32_t> code_points; std::vector<uint32_t> code_points;
// common english strings have the same number of codepoints and bytes. // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
code_points.reserve(n_src); code_points.reserve(n_src + 1);
uint32_t value = partial_start.value; uint32_t value = partial_start.value;
int n_remain = partial_start.n_remain; int n_remain = partial_start.n_remain;