Add probability dropdown to perplexity_colors extension (#3148)

This commit is contained in:
SeanScripts 2023-08-20 18:28:14 -05:00 committed by GitHub
parent 0dfd1a8b7d
commit 2c1fd0d72b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,17 +1,25 @@
import re
import time
import gradio import gradio
import markdown
import numpy as np
import torch import torch
from transformers import LogitsProcessor from transformers import LogitsProcessor
import numpy as np
from modules import shared from modules import html_generator, shared
from modules.html_generator import replace_blockquote
params = { params = {
'active': True,
'color_by_perplexity': False, 'color_by_perplexity': False,
'color_by_probability': False, 'color_by_probability': False,
'ppl_scale': 15.0, # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often. 'ppl_scale': 15.0, # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often.
#'probability_dropdown': False 'probability_dropdown': False,
'verbose': False # For debugging mostly
} }
class PerplexityLogits(LogitsProcessor): class PerplexityLogits(LogitsProcessor):
def __init__(self, verbose=False): def __init__(self, verbose=False):
self.generated_token_ids = [] self.generated_token_ids = []
@ -23,8 +31,9 @@ class PerplexityLogits(LogitsProcessor):
self.verbose = verbose self.verbose = verbose
def __call__(self, input_ids, scores): def __call__(self, input_ids, scores):
# t0 = time.time()
probs = torch.softmax(scores, dim=-1, dtype=torch.float) probs = torch.softmax(scores, dim=-1, dtype=torch.float)
log_probs = torch.nan_to_num(torch.log(probs)) log_probs = torch.nan_to_num(torch.log(probs)) # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity.
entropy = -torch.sum(probs * log_probs) entropy = -torch.sum(probs * log_probs)
entropy = entropy.cpu().numpy() entropy = entropy.cpu().numpy()
perplexity = round(float(np.exp(entropy)), 4) perplexity = round(float(np.exp(entropy)), 4)
@ -36,16 +45,16 @@ class PerplexityLogits(LogitsProcessor):
if len(self.selected_probs) > 0: if len(self.selected_probs) > 0:
# Is the selected token in the top tokens? # Is the selected token in the top tokens?
if self.verbose: if self.verbose:
print(shared.tokenizer.decode(last_token_id)) print('Probs: Token after', shared.tokenizer.decode(last_token_id))
print([shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1]]) print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]])
print(self.top_probs_list[-1]) print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
if last_token_id in self.top_token_ids_list[-1]: if last_token_id in self.top_token_ids_list[-1][0]:
idx = self.top_token_ids_list[-1].index(last_token_id) idx = self.top_token_ids_list[-1][0].index(last_token_id)
self.selected_probs.append(self.top_probs_list[-1][idx]) self.selected_probs.append(self.top_probs_list[-1][0][idx])
else: else:
self.top_token_ids_list[-1].append(last_token_id) self.top_token_ids_list[-1][0].append(last_token_id)
last_prob = round(float(self.last_probs[last_token_id]), 4) last_prob = round(float(self.last_probs[last_token_id]), 4)
self.top_probs_list[-1].append(last_prob) self.top_probs_list[-1][0].append(last_prob)
self.selected_probs.append(last_prob) self.selected_probs.append(last_prob)
else: else:
self.selected_probs.append(1.0) # Placeholder for the last token of the prompt self.selected_probs.append(1.0) # Placeholder for the last token of the prompt
@ -54,7 +63,7 @@ class PerplexityLogits(LogitsProcessor):
pplbar = "-" pplbar = "-"
if not np.isnan(perplexity): if not np.isnan(perplexity):
pplbar = "*" * round(perplexity) pplbar = "*" * round(perplexity)
print(f"{last_token}\t{perplexity:.2f}\t{pplbar}") print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}")
# Get top 5 probabilities # Get top 5 probabilities
top_tokens_and_probs = torch.topk(probs, 5) top_tokens_and_probs = torch.topk(probs, 5)
@ -67,76 +76,86 @@ class PerplexityLogits(LogitsProcessor):
probs = probs.cpu().numpy().flatten() probs = probs.cpu().numpy().flatten()
self.last_probs = probs # Need to keep this as a reference for top probs self.last_probs = probs # Need to keep this as a reference for top probs
# t1 = time.time()
# print(f"PPL Processor: {(t1-t0):.3f} s")
# About 1 ms, though occasionally up to around 100 ms, not sure why...
# Doesn't actually modify the logits! # Doesn't actually modify the logits!
return scores return scores
# Stores the perplexity and top probabilities # Stores the perplexity and top probabilities
ppl_logits_processor = None ppl_logits_processor = None
def logits_processor_modifier(logits_processor_list, input_ids): def logits_processor_modifier(logits_processor_list, input_ids):
global ppl_logits_processor global ppl_logits_processor
ppl_logits_processor = PerplexityLogits() if params['active']:
ppl_logits_processor = PerplexityLogits(verbose=params['verbose'])
logits_processor_list.append(ppl_logits_processor) logits_processor_list.append(ppl_logits_processor)
def output_modifier(text): def output_modifier(text):
global ppl_logits_processor global ppl_logits_processor
# t0 = time.time()
if not params['active']:
return text
# TODO: It's probably more efficient to do this above rather than modifying all these lists # TODO: It's probably more efficient to do this above rather than modifying all these lists
# Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation # Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation
perplexities = ppl_logits_processor.perplexities_list[:-1] perplexities = ppl_logits_processor.perplexities_list[:-1]
top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1] top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1]
top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids] for top_token_ids in top_token_ids_list] top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids[0]] for top_token_ids in top_token_ids_list]
top_probs_list = ppl_logits_processor.top_probs_list[:-1] top_probs_list = ppl_logits_processor.top_probs_list[:-1]
# Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt # Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt
gen_token_ids = ppl_logits_processor.generated_token_ids[1:] gen_token_ids = ppl_logits_processor.generated_token_ids[1:]
gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids] gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids]
sel_probs = ppl_logits_processor.selected_probs[1:] sel_probs = ppl_logits_processor.selected_probs[1:]
end_part = '</span>' # Helps with finding the index after replacing part of the text. end_part = '</div></div>' if params['probability_dropdown'] else '</span>' # Helps with finding the index after replacing part of the text.
in_code = False # Since the <span> tags mess up code blocks, avoid coloring while inside a code block, based on finding tokens with '`' in them in_code = False # Since the <span> tags mess up code blocks, avoid coloring while inside a code block, based on finding tokens with '`' in them
if params['color_by_probability'] and params['color_by_perplexity']:
i = 0 i = 0
for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list): for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
if '`' in token: if '`' in token and not params['probability_dropdown']:
in_code = not in_code in_code = not in_code
continue continue
if in_code: if in_code:
continue continue
color = 'ffffff'
if params['color_by_probability'] and params['color_by_perplexity']:
color = probability_perplexity_color_scale(prob, ppl) color = probability_perplexity_color_scale(prob, ppl)
if token in text[i:]:
text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
i += text[i:].find(end_part) + len(end_part)
elif params['color_by_perplexity']: elif params['color_by_perplexity']:
i = 0
for token, ppl, top_tokens, top_probs in zip(gen_tokens, perplexities, top_tokens_list, top_probs_list):
if '`' in token:
in_code = not in_code
continue
if in_code:
continue
color = perplexity_color_scale(ppl) color = perplexity_color_scale(ppl)
if token in text[i:]:
text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
i += text[i:].find(end_part) + len(end_part)
elif params['color_by_probability']: elif params['color_by_probability']:
i = 0
for token, prob, top_tokens, top_probs in zip(gen_tokens, sel_probs, top_tokens_list, top_probs_list):
if '`' in token:
in_code = not in_code
continue
if in_code:
continue
color = probability_color_scale(prob) color = probability_color_scale(prob)
if token in text[i:]: if token in text[i:]:
if params['probability_dropdown']:
after_token_index = text[i:].find(token) + len(token)
whitespace = text[i:][after_token_index:(after_token_index + 1)]
if whitespace != ' ':
whitespace = ''
text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], whitespace, ppl), 1)
else:
text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1) text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
i += text[i:].find(end_part) + len(end_part) i += text[i:].find(end_part) + len(end_part)
print('Average perplexity:', round(np.mean(perplexities), 4)) # Use full perplexity list for calculating the average here.
print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
# Optional hacky workaround: Without this, spaces get added between every token. With this, there is a little extra whitespace at the top.
# This fixes the tokenization spaces, somehow. However, this also removes any paragraph breaks in the message.
# return '<p>' + text + '</p>'
# t1 = time.time()
# print(f"Modifier: {(t1-t0):.3f} s")
# About 50 ms
return text return text
# Green-yellow-red color scale
def probability_color_scale(prob): def probability_color_scale(prob):
'''
Green-yellow-red color scale
'''
rv = 0 rv = 0
gv = 0 gv = 0
if prob <= 0.5: if prob <= 0.5:
@ -149,22 +168,32 @@ def probability_color_scale(prob):
gv = 'ff' gv = 'ff'
if len(rv) < 2: if len(rv) < 2:
rv = '0' * (2 - len(rv)) + rv rv = '0' * (2 - len(rv)) + rv
return rv + gv + '00' return rv + gv + '00'
# Red component only, white for 0 perplexity (sorry if you're not in dark mode)
def perplexity_color_scale(ppl): def perplexity_color_scale(ppl):
'''
Red component only, white for 0 perplexity (sorry if you're not in dark mode)
'''
value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:] value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:]
if len(value) < 2: if len(value) < 2:
value = '0' * (2 - len(value)) + value value = '0' * (2 - len(value)) + value
return 'ff' + value + value return 'ff' + value + value
# Green-yellow-red for probability and blue component for perplexity
def probability_perplexity_color_scale(prob, ppl): def probability_perplexity_color_scale(prob, ppl):
'''
Green-yellow-red for probability and blue component for perplexity
'''
rv = 0 rv = 0
gv = 0 gv = 0
bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:] bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:]
if len(bv) < 2: if len(bv) < 2:
bv = '0' * (2 - len(bv)) + bv bv = '0' * (2 - len(bv)) + bv
if prob <= 0.5: if prob <= 0.5:
rv = 'ff' rv = 'ff'
gv = hex(int(255 * prob * 2))[2:] gv = hex(int(255 * prob * 2))[2:]
@ -175,41 +204,159 @@ def probability_perplexity_color_scale(prob, ppl):
gv = 'ff' gv = 'ff'
if len(rv) < 2: if len(rv) < 2:
rv = '0' * (2 - len(rv)) + rv rv = '0' * (2 - len(rv)) + rv
return rv + gv + bv return rv + gv + bv
def add_color_html(token, color): def add_color_html(token, color):
return f'<span style="color: #{color}">{token}</span>' return f'<span style="color: #{color}">{token}</span>'
"""
# This is still very broken at the moment, needs CSS too but I'm not very good at CSS (and neither is GPT-4 apparently) so I still need to figure that out. # TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history.
def add_dropdown_html(token, color, top_tokens, top_probs): # I think the issue is from HTML elements taking up space in the visible history, and things like history deepcopy add latency proportional to the size of the history.
html = f'<span class="hoverable" style="color: #{color}">{token}<div class="dropdown"><table class="dropdown-content">' # Potential solution is maybe to modify the main generation code to send just the internal text and not the visible history, to avoid moving too much around.
for token, prob in zip(top_tokens, top_probs): # I wonder if we can also avoid using deepcopy here.
# TODO: Background color? Bold for selected token? # The whitespace fix here is not perfect -- it will remove whitespace of paragraph breaks and other particular cases.
# Bigger issue: Why is there a newline after the first token, and the dropdown fails there? def add_dropdown_html(token, color, top_tokens, top_probs, whitespace='', perplexity=0):
# The HTML ends up like <p><span>word</span></p><div>...</div>, if whitespace != '':
# even though for all other tokens it shows up correctly. whitespace = '&nbsp;'
html = f'<div class="hoverable"><span style="color: #{color}">{token}{whitespace}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
for token_option, prob in zip(top_tokens, top_probs):
# TODO: Bold for selected token?
# Using divs prevented the problem of divs inside spans causing issues.
# Now the problem is that divs show the same whitespace of one space between every token.
# There is probably some way to fix this in CSS that I don't know about.
row_color = probability_color_scale(prob) row_color = probability_color_scale(prob)
html += f'<tr><td style="color: #{row_color}">{token}</td><td style="color: #{row_color}">{prob}</td></tr>' row_class = ' class="selected"' if token_option == token else ''
html += '</table></div></span>' html += f'<tr{row_class}><td style="color: #{row_color}">{token_option}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
return html if perplexity != 0:
ppl_color = perplexity_color_scale(perplexity)
html += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
html += '</tbody></table></div></div>\n' # The newline would normally be added by markdown.markdown() but this is faster.
return html # About 750 characters per token...
def custom_css():
return """
.dropdown {
display: none;
position: absolute;
z-index: 50;
background-color: var(--block-background-fill);
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
width: max-content;
overflow: visible;
padding: 5px;
border-radius: 10px;
border: 1px solid var(--border-color-primary);
}
.dropdown-content {
border: none;
z-index: 50;
}
.dropdown-content tr.selected {
background-color: var(--block-label-background-fill);
}
.dropdown-content td {
color: var(--body-text-color);
}
.hoverable {
color: var(--body-text-color);
position: relative;
display: inline-block;
overflow: visible;
font-size: 15px;
line-height: 1.75;
margin: 0;
padding: 0;
margin-right: -4px;
}
.hoverable:hover .dropdown {
display: block;
}
# TODO: This makes the hover menus extend outside the bounds of the chat area, which is good.
# However, it also makes the scrollbar disappear, which is bad.
# The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area.
#.chat {
# overflow-y: auto;
#}
""" """
# Monkeypatch applied to html_generator.py
# This fixes an issue where the markdown conversion was causing a large slowdown in generation speeds if too many tokens had probability dropdowns added.
# I'd rather have a more long-term solution, since this really shouldn't be called on all messages for each token, but this works for now.
def convert_to_markdown(string):
# t0 = time.time()
# Blockquote
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
string = pattern.sub(replace_blockquote, string)
# Code
string = string.replace('\\begin{code}', '```')
string = string.replace('\\end{code}', '```')
string = re.sub(r"(.)```", r"\1\n```", string)
result = ''
is_code = False
for line in string.split('\n'):
if line.lstrip(' ').startswith('```'):
is_code = not is_code
result += line
if is_code or line.startswith('|'): # Don't add an extra \n for tables or code
result += '\n'
else:
result += '\n\n'
if is_code:
result = result + '```' # Unfinished code block
string = result.strip()
# t1 = time.time()
# print(len(string))
# print(f"Pre markdown: {(t1-t0):.3f} s")
if params['probability_dropdown'] and '<div class="hoverable">' in string:
# Prevents all latency introduced by trying to convert the HTML to markdown when it's not even necessary
# print('Monkeypatched')
return string
else:
# t0 = time.time()
return markdown.markdown(string, extensions=['fenced_code', 'tables'])
# t1 = time.time()
# print(f"Markdown: {(t1-t0):.3f} s for string of length {len(string)}")
# print(string)
# print(res)
# return res
html_generator.convert_to_markdown = convert_to_markdown
def ui(): def ui():
color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.") def update_active_check(x):
params.update({'active': x})
def update_color_by_ppl_check(x): def update_color_by_ppl_check(x):
params.update({'color_by_perplexity': x}) params.update({'color_by_perplexity': x})
color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)
color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
def update_color_by_prob_check(x): def update_color_by_prob_check(x):
params.update({'color_by_probability': x}) params.update({'color_by_probability': x})
color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)
# Doesn't work yet...
"""
prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown")
def update_prob_dropdown_check(x): def update_prob_dropdown_check(x):
params.update({'probability_dropdown': x}) params.update({'probability_dropdown': x})
active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.")
color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.")
active_check.change(update_active_check, active_check, None)
color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)
color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)
prob_dropdown_check.change(update_prob_dropdown_check, prob_dropdown_check, None) prob_dropdown_check.change(update_prob_dropdown_check, prob_dropdown_check, None)
"""