Fixing Mistral Templates

Hi there, fixing the templates as close as possible to the ground truth, you ca find more information regarding the templates in here: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md

Still needs to be verified, so please dont merge yet!
This commit is contained in:
pandora 2024-09-21 18:48:28 +02:00 committed by GitHub
parent a50477ec85
commit b98635d823
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,15 +1,25 @@
instruction_template: |-
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- message['content'] -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'[INST] ' + message['content'].rstrip() + ' [/INST]'-}}
{%- else -%}
{{-'' + message['content'] + '</s>' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-''-}}
{%- endif -%}
{%- if messages[0]['role'] == 'system' %}
{%- set system_message = messages[0]['content'] %}
{%- set loop_messages = messages[1:] %}
{%- else %}
{%- set loop_messages = messages %}
{%- endif %}
{{- bos_token }}
{%- for message in loop_messages %}
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
{{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}
{%- endif %}
{%- if message['role'] == 'user' %}
{%- if loop.first and system_message is defined %}
{{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}
{%- else %}
{{- ' [INST] ' + message['content'] + ' [/INST]' }}
{%- endif %}
{%- elif message['role'] == 'assistant' %}
{{- ' ' + message['content'] + '</s>'}}
{%- else %}
{{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}
{%- endif %}
{%- endfor %}