Improve the UI tokenizer

This commit is contained in:
oobabooga 2023-09-15 19:30:44 -07:00
parent c3e4c9fdc2
commit ef04138bc0
4 changed files with 21 additions and 6 deletions

View File

@ -127,6 +127,10 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
height: calc(100dvh - 292px);
}
.monospace {
font-family: monospace;
}
.textbox_default textarea,
.textbox_default_output textarea,
.textbox_logits textarea,

View File

@ -144,6 +144,17 @@ def get_encoded_length(prompt):
return len(encode(prompt)[0])
def get_token_ids(prompt):
tokens = encode(prompt)[0]
decoded_tokens = [shared.tokenizer.decode(i) for i in tokens]
output = ''
for row in list(zip(tokens, decoded_tokens)):
output += f"{str(int(row[0])).ljust(5)} - {row[1]}\n"
return output
def get_max_prompt_length(state):
return state['truncation_length'] - state['max_new_tokens']

View File

@ -3,8 +3,8 @@ import gradio as gr
from modules import logits, shared, ui, utils
from modules.prompts import count_tokens, load_prompt
from modules.text_generation import (
encode,
generate_reply_wrapper,
get_token_ids,
stop_everything_event
)
from modules.utils import gradio
@ -57,7 +57,7 @@ def create_ui():
with gr.Tab('Tokens'):
shared.gradio['get_tokens-default'] = gr.Button('Get token IDs for the input')
shared.gradio['tokens-default'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits', 'add_scrollbar'])
shared.gradio['tokens-default'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits', 'add_scrollbar', 'monospace'])
def create_event_handlers():
@ -100,4 +100,4 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
logits.get_next_logits, gradio('textbox-default', 'interface_state', 'use_samplers-default', 'logits-default'), gradio('logits-default', 'logits-default-previous'), show_progress=False)
shared.gradio['get_tokens-default'].click(lambda x : str(encode(x)[0].tolist()), gradio('textbox-default'), gradio('tokens-default'), show_progress=False)
shared.gradio['get_tokens-default'].click(get_token_ids, gradio('textbox-default'), gradio('tokens-default'), show_progress=False)

View File

@ -3,8 +3,8 @@ import gradio as gr
from modules import logits, shared, ui, utils
from modules.prompts import count_tokens, load_prompt
from modules.text_generation import (
encode,
generate_reply_wrapper,
get_token_ids,
stop_everything_event
)
from modules.utils import gradio
@ -43,7 +43,7 @@ def create_ui():
with gr.Tab('Tokens'):
shared.gradio['get_tokens-notebook'] = gr.Button('Get token IDs for the input')
shared.gradio['tokens-notebook'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits_notebook', 'add_scrollbar'])
shared.gradio['tokens-notebook'] = gr.Textbox(lines=23, label='Tokens', elem_classes=['textbox_logits_notebook', 'add_scrollbar', 'monospace'])
with gr.Row():
shared.gradio['Generate-notebook'] = gr.Button('Generate', variant='primary', elem_classes='small-button')
@ -102,4 +102,4 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
logits.get_next_logits, gradio('textbox-notebook', 'interface_state', 'use_samplers-notebook', 'logits-notebook'), gradio('logits-notebook', 'logits-notebook-previous'), show_progress=False)
shared.gradio['get_tokens-notebook'].click(lambda x : str(encode(x)[0].tolist()), gradio('textbox-notebook'), gradio('tokens-notebook'), show_progress=False)
shared.gradio['get_tokens-notebook'].click(get_token_ids, gradio('textbox-notebook'), gradio('tokens-notebook'), show_progress=False)