mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-15 06:40:45 +01:00
fix eos tokens to glm4
This commit is contained in:
parent
1fc5bf5bcb
commit
8c5f1b2b6c
@ -2728,6 +2728,8 @@ class ChatGLMModel(Model):
|
|||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
|
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
|
||||||
vocab_size = hparams.get("padded_vocab_size", len(tokenizer.get_vocab()))
|
vocab_size = hparams.get("padded_vocab_size", len(tokenizer.get_vocab()))
|
||||||
assert max(tokenizer.get_vocab().values()) < vocab_size
|
assert max(tokenizer.get_vocab().values()) < vocab_size
|
||||||
|
role_special_tokens = ["<|system|>", "<|user|>", "<|assistant|>", "<|observation|>"]
|
||||||
|
special_tokens = ["[MASK]", "[gMASK]", "[sMASK]", "sop", "eop"] + role_special_tokens
|
||||||
print(vocab_size)
|
print(vocab_size)
|
||||||
print(max(tokenizer.get_vocab().values()))
|
print(max(tokenizer.get_vocab().values()))
|
||||||
for token_id in range(vocab_size):
|
for token_id in range(vocab_size):
|
||||||
@ -2750,6 +2752,11 @@ class ChatGLMModel(Model):
|
|||||||
text = f"[PAD{token_id}]".encode("utf-8")
|
text = f"[PAD{token_id}]".encode("utf-8")
|
||||||
|
|
||||||
if token_id >= tokenizer.tokenizer.sp_model.vocab_size():
|
if token_id >= tokenizer.tokenizer.sp_model.vocab_size():
|
||||||
|
if piece in special_tokens:
|
||||||
|
# show special tokens in prompt
|
||||||
|
toktype = SentencePieceTokenTypes.USER_DEFINED
|
||||||
|
else:
|
||||||
|
print(f"unknow token: {piece}")
|
||||||
toktype = SentencePieceTokenTypes.UNKNOWN
|
toktype = SentencePieceTokenTypes.UNKNOWN
|
||||||
tokens.append(text)
|
tokens.append(text)
|
||||||
scores.append(score)
|
scores.append(score)
|
||||||
@ -2856,7 +2863,7 @@ class ChatGLMModel(Model):
|
|||||||
special_vocab.chat_template = "ChatGLM4"
|
special_vocab.chat_template = "ChatGLM4"
|
||||||
special_vocab.merges = merges
|
special_vocab.merges = merges
|
||||||
# only add special tokens when they were not already loaded from config.json
|
# only add special tokens when they were not already loaded from config.json
|
||||||
if len(special_vocab.special_token_ids) == 0:
|
# if len(special_vocab.special_token_ids) == 0:
|
||||||
special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||||
special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
|
||||||
# this one is usually not in config.json anyway
|
# this one is usually not in config.json anyway
|
||||||
@ -2955,7 +2962,7 @@ def parse_args() -> argparse.Namespace:
|
|||||||
help="model is executed on big endian machine",
|
help="model is executed on big endian machine",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"model", type=Path,
|
"--model", type=Path,
|
||||||
help="directory containing model file",
|
help="directory containing model file",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
35
llama.cpp
35
llama.cpp
@ -1802,9 +1802,11 @@ enum e_model {
|
|||||||
MODEL_2_8B,
|
MODEL_2_8B,
|
||||||
MODEL_3B,
|
MODEL_3B,
|
||||||
MODEL_4B,
|
MODEL_4B,
|
||||||
|
MODEL_6B,
|
||||||
MODEL_6_9B,
|
MODEL_6_9B,
|
||||||
MODEL_7B,
|
MODEL_7B,
|
||||||
MODEL_8B,
|
MODEL_8B,
|
||||||
|
MODEL_9B,
|
||||||
MODEL_12B,
|
MODEL_12B,
|
||||||
MODEL_13B,
|
MODEL_13B,
|
||||||
MODEL_14B,
|
MODEL_14B,
|
||||||
@ -3918,9 +3920,11 @@ static const char * llama_model_type_name(e_model type) {
|
|||||||
case MODEL_2_8B: return "2.8B";
|
case MODEL_2_8B: return "2.8B";
|
||||||
case MODEL_3B: return "3B";
|
case MODEL_3B: return "3B";
|
||||||
case MODEL_4B: return "4B";
|
case MODEL_4B: return "4B";
|
||||||
|
case MODEL_6B: return "6B";
|
||||||
case MODEL_6_9B: return "6.9B";
|
case MODEL_6_9B: return "6.9B";
|
||||||
case MODEL_7B: return "7B";
|
case MODEL_7B: return "7B";
|
||||||
case MODEL_8B: return "8B";
|
case MODEL_8B: return "8B";
|
||||||
|
case MODEL_9B: return "9B";
|
||||||
case MODEL_12B: return "12B";
|
case MODEL_12B: return "12B";
|
||||||
case MODEL_13B: return "13B";
|
case MODEL_13B: return "13B";
|
||||||
case MODEL_14B: return "14B";
|
case MODEL_14B: return "14B";
|
||||||
@ -4507,8 +4511,8 @@ static void llm_load_hparams(
|
|||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
case 28: model.type = e_model::MODEL_7B; break;
|
case 28: model.type = e_model::MODEL_6B; break;
|
||||||
case 40: model.type = e_model::MODEL_8B; break;
|
case 40: model.type = e_model::MODEL_9B; break;
|
||||||
default: model.type = e_model::MODEL_UNKNOWN;
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
@ -18362,6 +18366,19 @@ llama_token_type llama_token_get_type(const struct llama_model * model, llama_to
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool llama_token_is_eog(const struct llama_model * model, llama_token token) {
|
bool llama_token_is_eog(const struct llama_model * model, llama_token token) {
|
||||||
|
auto arch_name = llama_model_arch_name(model->arch);
|
||||||
|
auto vocab_type = model->vocab.type;
|
||||||
|
if (strcmp(arch_name, "chatglm") == 0) {
|
||||||
|
if (LLAMA_VOCAB_TYPE_BPE == vocab_type) { // glm4
|
||||||
|
return token != -1 && (
|
||||||
|
token == llama_token_eos(model) ||
|
||||||
|
token == llama_token_eot(model) ||
|
||||||
|
token == 151329 ||
|
||||||
|
token == 151336 ||
|
||||||
|
token == 151338
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
return token != -1 && (
|
return token != -1 && (
|
||||||
token == llama_token_eos(model) ||
|
token == llama_token_eos(model) ||
|
||||||
token == llama_token_eot(model)
|
token == llama_token_eot(model)
|
||||||
@ -18424,8 +18441,18 @@ int32_t llama_tokenize(
|
|||||||
int32_t n_tokens_max,
|
int32_t n_tokens_max,
|
||||||
bool add_special,
|
bool add_special,
|
||||||
bool parse_special) {
|
bool parse_special) {
|
||||||
auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_special, parse_special);
|
auto arch_name = llama_model_arch_name(model->arch);
|
||||||
|
auto prompt = std::move(std::string(text, text_len));
|
||||||
|
auto vocab_type = model->vocab.type;
|
||||||
|
if (strcmp(arch_name, "chatglm") == 0) {
|
||||||
|
// chatglm3
|
||||||
|
if (LLAMA_VOCAB_TYPE_SPM == vocab_type) {
|
||||||
|
prompt = "[gMASK]sop<|user|>\n" + prompt + "<|assistant|>";
|
||||||
|
} else if (LLAMA_VOCAB_TYPE_BPE == vocab_type) { // glm4
|
||||||
|
prompt = "[gMASK]<sop><|user|>\n" + prompt + "<|assistant|>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
auto res = llama_tokenize_internal(model->vocab, prompt, add_special, parse_special);
|
||||||
if (n_tokens_max < (int) res.size()) {
|
if (n_tokens_max < (int) res.size()) {
|
||||||
// LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
|
// LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
|
||||||
return -((int) res.size());
|
return -((int) res.size());
|
||||||
|
Loading…
Reference in New Issue
Block a user