From d1a1b614cdd690c79579c5628af1998dcd01e502 Mon Sep 17 00:00:00 2001 From: Jared Van Bortel Date: Thu, 4 Apr 2024 17:54:46 -0400 Subject: [PATCH] spm : fix special_add_bos default When this is not set in HF `tokenizer_config.json`, it should default to true. --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 9af370658..850cfb91b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11562,7 +11562,7 @@ static std::vector llama_tokenize_internal(const llama_vocab & // tokenizer.encode('', add_special_tokens=True) returns [1] // tokenizer.encode('', add_special_tokens=False) returns [] - if (add_special && vocab.special_add_bos == 1) { + if (add_special && vocab.special_add_bos != 0) { GGML_ASSERT(vocab.special_bos_id != -1); output.push_back(vocab.special_bos_id); }