diff --git a/Templates-supported-by-llama_chat_apply_template.md b/Templates-supported-by-llama_chat_apply_template.md index bf61d6b..7f392a1 100644 --- a/Templates-supported-by-llama_chat_apply_template.md +++ b/Templates-supported-by-llama_chat_apply_template.md @@ -19,6 +19,7 @@ VARIANTS_TO_TEST = [ 'bofenghuang/vigogne-2-70b-chat', 'mlabonne/AlphaMonarch-7B', 'google/gemma-7b-it', + 'OrionStarAI/Orion-14B-Chat', ] HISTORY = [ @@ -34,7 +35,7 @@ for variant in VARIANTS_TO_TEST: if 'Mistral' in variant or 'gemma' in variant: history.pop(0) # no system prompt for mistral and gemma if 'gemma' in variant: - # GemmaTokenizer is not yet support by the time this code is written + # GemmaTokenizer is quite buggy, let's hard code the template here GEMMA_TMLP = "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}" print('Gemma') output = AutoTokenizer.from_pretrained(VARIANTS_TO_TEST[0]).apply_chat_template(history, tokenize=False, chat_template=GEMMA_TMLP) @@ -131,6 +132,15 @@ again<|endoftext|> response<|endoftext|> ``` +``` +Usage: ./server -m ... --chat-template orion +Human: hello + +Assistant: responseHuman: again + +Assistant: response +``` + ## Custom chat templates Currently, it's not possible to use your own chat template with llama.cpp server's `/chat/completions`