mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-02-06 00:20:34 +01:00
vocab : correctly identify LF token for GPT-2 style BPE tokenizer (#11496)
This commit is contained in:
parent
4314e56c4f
commit
ffd0821c57
@ -1692,7 +1692,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||||||
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
||||||
linefeed_id = ids[0];
|
linefeed_id = ids[0];
|
||||||
} else {
|
} else {
|
||||||
const std::vector<int> ids = tokenize("\xC4\x8A", false); // U+010A
|
const std::vector<int> ids = tokenize("\n", false);
|
||||||
|
|
||||||
//GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
//GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
||||||
if (ids.empty()) {
|
if (ids.empty()) {
|
||||||
|
Loading…
Reference in New Issue
Block a user