spm : fix special_add_bos default

When this is not set in HF `tokenizer_config.json`, it should default to
true.
This commit is contained in:
Jared Van Bortel 2024-04-04 17:54:46 -04:00
parent 45983e3a47
commit d1a1b614cd

View File

@ -11562,7 +11562,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
// tokenizer.encode('', add_special_tokens=True) returns [1]
// tokenizer.encode('', add_special_tokens=False) returns []
if (add_special && vocab.special_add_bos == 1) {
if (add_special && vocab.special_add_bos != 0) {
GGML_ASSERT(vocab.special_bos_id != -1);
output.push_back(vocab.special_bos_id);
}