From d54f53ca51a35af80bc6bf15849042c717587038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Yusuf=20Sar=C4=B1g=C3=B6z?= Date: Sat, 29 Jul 2023 12:04:45 +0300 Subject: [PATCH] gguf : add tokenization constants --- constants.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/constants.py b/constants.py index 34880bb20..024bf7b03 100644 --- a/constants.py +++ b/constants.py @@ -31,3 +31,16 @@ KEY_ATTENTION_CLAMP_KQV = "{llm}.attention.clamp_kqv" # RoPE KEY_ROPE_DIMENSION_COUNT = "{llm}.rope.dimension_count" KEY_ROPE_SCALE = "{llm}.rope.scale" + +# tokenization +KEY_TOKENIZER_MODEL = "tokenizer.ggml.model" +KEY_TOKENIZER_LIST = "tokenizer.ggml.tokens" +KEY_TOKENIZER_SCORES = "tokenizer.ggml.scores" +KEY_TOKENIZER_MERGES = "tokenizer.ggml.merges" +KEY_TOKENIZER_BOS_ID = "tokenizer.ggml.bos_token_id" +KEY_TOKENIZER_EOS_ID = "tokenizer.ggml.eos_token_id" +KEY_TOKENIZER_UNK_ID = "tokenizer.ggml.unknown_token_id" +KEY_TOKENIZER_SEP_ID = "tokenizer.ggml.seperator_token_id" +KEY_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id" +KEY_TOKENIZER_HF_JSON = "tokenizer.huggingface.json" +KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world"