text-generation-webui/modules/html_generator.py

323 lines
10 KiB
Python
Raw Normal View History

2024-03-17 16:29:03 +01:00
import functools
2023-08-16 18:23:29 +02:00
import html
import os
2023-01-07 03:14:08 +01:00
import re
import time
from pathlib import Path
2023-01-07 03:14:08 +01:00
2023-03-15 16:33:26 +01:00
import markdown
2023-04-05 03:52:15 +02:00
from PIL import Image, ImageOps
2024-01-04 04:27:26 +01:00
from modules import shared
2024-01-22 12:25:55 +01:00
from modules.utils import get_available_chat_styles
# This is to store the paths to the thumbnails of the profile pictures
image_cache = {}
2023-03-15 18:19:28 +01:00
with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
readable_css = f.read()
with open(Path(__file__).resolve().parent / '../css/html_4chan_style.css', 'r') as css_f:
_4chan_css = css_f.read()
2023-04-05 16:49:59 +02:00
with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r') as f:
instruct_css = f.read()
2023-03-15 16:33:26 +01:00
# Custom chat styles
chat_styles = {}
for k in get_available_chat_styles():
chat_styles[k] = open(Path(f'css/chat_style-{k}.css'), 'r').read()
2023-09-15 01:15:08 +02:00
# Handle styles that derive from other styles
for k in chat_styles:
lines = chat_styles[k].split('\n')
input_string = lines[0]
match = re.search(r'chat_style-([a-z\-]*)\.css', input_string)
if match:
style = match.group(1)
chat_styles[k] = chat_styles.get(style, '') + '\n\n' + '\n'.join(lines[1:])
2023-03-17 20:06:11 +01:00
def fix_newlines(string):
string = string.replace('\n', '\n\n')
string = re.sub(r"\n{3,}", "\n\n", string)
string = string.strip()
return string
2023-04-17 02:26:19 +02:00
2023-04-16 23:00:12 +02:00
def replace_blockquote(m):
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
2023-04-17 02:26:19 +02:00
@functools.lru_cache(maxsize=4096)
2023-03-17 20:06:11 +01:00
def convert_to_markdown(string):
2023-04-16 23:00:12 +02:00
# Blockquote
2023-09-14 14:57:04 +02:00
string = re.sub(r'(^|[\n])>', r'\1>', string)
2023-04-16 23:00:12 +02:00
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
string = pattern.sub(replace_blockquote, string)
# Code
2023-03-17 20:06:11 +01:00
string = string.replace('\\begin{code}', '```')
string = string.replace('\\end{code}', '```')
string = re.sub(r"(.)```", r"\1\n```", string)
2023-04-16 23:00:12 +02:00
result = ''
is_code = False
for line in string.split('\n'):
if line.lstrip(' ').startswith('```'):
is_code = not is_code
result += line
2023-05-10 18:41:23 +02:00
if is_code or line.startswith('|'): # Don't add an extra \n for tables or code
result += '\n'
else:
result += '\n\n'
result = result.strip()
if is_code:
result += '\n```' # Unfinished code block
# Unfinished list, like "\n1.". A |delete| string is added and then
# removed to force a <ol> or <ul> to be generated instead of a <p>.
if re.search(r'(\n\d+\.?|\n\*\s*)$', result):
delete_str = '|delete|'
if re.search(r'(\d+\.?)$', result) and not result.endswith('.'):
result += '.'
result = re.sub(r'(\n\d+\.?|\n\*\s*)$', r'\g<1> ' + delete_str, result)
2023-08-17 06:08:01 +02:00
html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'])
pos = html_output.rfind(delete_str)
if pos > -1:
2023-08-17 06:08:01 +02:00
html_output = html_output[:pos] + html_output[pos + len(delete_str):]
else:
2023-08-17 06:08:01 +02:00
html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'])
2023-08-25 06:01:09 +02:00
# Unescape code blocks
pattern = re.compile(r'<code[^>]*>(.*?)</code>', re.DOTALL)
html_output = pattern.sub(lambda x: html.unescape(x.group()), html_output)
2023-08-17 06:08:01 +02:00
return html_output
2023-03-17 20:06:11 +01:00
def convert_to_markdown_wrapped(string, use_cache=True):
'''
Used to avoid caching convert_to_markdown calls during streaming.
'''
if use_cache:
return convert_to_markdown(string)
return convert_to_markdown.__wrapped__(string)
2023-03-17 20:06:11 +01:00
def generate_basic_html(string):
string = convert_to_markdown(string)
2023-10-11 03:45:12 +02:00
string = f'<style>{readable_css}</style><div class="readable-container">{string}</div>'
2023-03-17 20:06:11 +01:00
return string
2023-01-15 20:43:31 +01:00
2023-01-07 03:14:08 +01:00
def process_post(post, c):
t = post.split('\n')
number = t[0].split(' ')[1]
if len(t) > 1:
src = '\n'.join(t[1:])
else:
src = ''
src = re.sub('>', '&gt;', src)
src = re.sub('(&gt;&gt;[0-9]*)', '<span class="quote">\\1</span>', src)
src = re.sub('\n', '<br>\n', src)
2023-08-14 04:24:09 +02:00
src = f'<blockquote class="message_4chan">{src}\n'
2023-01-07 03:14:08 +01:00
src = f'<span class="name">Anonymous </span> <span class="number">No.{number}</span>\n{src}'
return src
2023-01-11 05:10:11 +01:00
def generate_4chan_html(f):
2023-01-07 03:14:08 +01:00
posts = []
post = ''
c = -2
for line in f.splitlines():
line += "\n"
if line == '-----\n':
continue
elif line.startswith('--- '):
c += 1
if post != '':
src = process_post(post, c)
posts.append(src)
post = line
else:
post += line
2023-01-07 03:14:08 +01:00
if post != '':
src = process_post(post, c)
posts.append(src)
for i in range(len(posts)):
if i == 0:
posts[i] = f'<div class="op">{posts[i]}</div>\n'
else:
posts[i] = f'<div class="reply">{posts[i]}</div>\n'
2023-01-07 03:14:08 +01:00
output = ''
2023-03-15 18:19:28 +01:00
output += f'<style>{_4chan_css}</style><div id="parent"><div id="container">'
2023-01-07 03:14:08 +01:00
for post in posts:
output += post
output += '</div></div>'
2023-01-07 03:14:08 +01:00
output = output.split('\n')
for i in range(len(output)):
2023-01-07 05:20:10 +01:00
output[i] = re.sub(r'^(&gt;(.*?)(<br>|</div>))', r'<span class="greentext">\1</span>', output[i])
2023-08-14 04:24:09 +02:00
output[i] = re.sub(r'^<blockquote class="message_4chan">(&gt;(.*?)(<br>|</div>))', r'<blockquote class="message_4chan"><span class="greentext">\1</span>', output[i])
2023-01-07 03:14:08 +01:00
output = '\n'.join(output)
2023-01-07 03:14:08 +01:00
return output
2023-04-05 04:03:58 +02:00
def make_thumbnail(image):
image = image.resize((350, round(image.size[1] / image.size[0] * 350)), Image.Resampling.LANCZOS)
2023-04-05 04:03:58 +02:00
if image.size[1] > 470:
image = ImageOps.fit(image, (350, 470), Image.LANCZOS)
2023-04-05 04:03:58 +02:00
return image
def get_image_cache(path):
2024-01-04 04:27:26 +01:00
cache_folder = Path(shared.args.disk_cache_dir)
if not cache_folder.exists():
cache_folder.mkdir()
mtime = os.stat(path).st_mtime
if (path in image_cache and mtime != image_cache[path][0]) or (path not in image_cache):
2023-04-05 04:03:58 +02:00
img = make_thumbnail(Image.open(path))
2024-01-04 04:27:26 +01:00
old_p = Path(f'{cache_folder}/{path.name}_cache.png')
p = Path(f'{cache_folder}/cache_{path.name}.png')
if old_p.exists():
old_p.rename(p)
output_file = p
img.convert('RGB').save(output_file, format='PNG')
image_cache[path] = [mtime, output_file.as_posix()]
return image_cache[path][1]
2023-04-05 16:49:59 +02:00
def generate_instruct_html(history):
output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
2023-08-05 18:53:54 +02:00
for i, _row in enumerate(history):
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
2023-04-05 16:49:59 +02:00
2023-08-05 18:53:54 +02:00
if row[0]: # don't display empty user messages
output += f"""
<div class="user-message">
<div class="text">
<div class="message-body">
{row[0]}
</div>
</div>
2023-04-05 16:49:59 +02:00
</div>
2023-08-05 18:53:54 +02:00
"""
2023-04-05 16:49:59 +02:00
output += f"""
2023-08-05 18:53:54 +02:00
<div class="assistant-message">
2023-04-05 16:49:59 +02:00
<div class="text">
<div class="message-body">
2023-08-05 18:53:54 +02:00
{row[1]}
2023-04-05 16:49:59 +02:00
</div>
</div>
</div>
"""
2023-08-05 18:53:54 +02:00
output += "</div></div>"
2023-04-05 16:49:59 +02:00
return output
2023-12-04 02:45:50 +01:00
def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False):
output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
2023-03-24 21:18:27 +01:00
2023-12-04 02:45:50 +01:00
# We use ?character and ?time.time() to force the browser to reset caches
img_bot = f'<img src="file/cache/pfp_character_thumb.png?{character}" class="pfp_character">' if Path("cache/pfp_character_thumb.png").exists() else ''
img_me = f'<img src="file/cache/pfp_me.png?{time.time() if reset_cache else ""}">' if Path("cache/pfp_me.png").exists() else ''
2023-08-05 18:53:54 +02:00
for i, _row in enumerate(history):
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
2023-03-24 21:18:27 +01:00
2023-08-05 18:53:54 +02:00
if row[0]: # don't display empty user messages
output += f"""
<div class="message">
<div class="circle-you">
{img_me}
</div>
<div class="text">
<div class="username">
{name1}
</div>
<div class="message-body">
{row[0]}
</div>
</div>
</div>
"""
output += f"""
<div class="message">
<div class="circle-bot">
{img_bot}
</div>
<div class="text">
<div class="username">
{name2}
</div>
<div class="message-body">
{row[1]}
</div>
</div>
</div>
"""
2023-08-05 18:53:54 +02:00
output += "</div></div>"
return output
2023-04-05 16:49:59 +02:00
2023-04-16 21:44:50 +02:00
def generate_chat_html(history, name1, name2, reset_cache=False):
output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
2023-04-16 21:44:50 +02:00
2023-08-05 18:53:54 +02:00
for i, _row in enumerate(history):
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
2023-04-16 21:44:50 +02:00
2023-08-05 18:53:54 +02:00
if row[0]: # don't display empty user messages
output += f"""
2023-04-16 21:44:50 +02:00
<div class="message">
2023-08-05 18:53:54 +02:00
<div class="text-you">
2023-04-16 21:44:50 +02:00
<div class="message-body">
2023-08-05 18:53:54 +02:00
{row[0]}
2023-04-16 21:44:50 +02:00
</div>
</div>
</div>
"""
output += f"""
2023-08-05 18:53:54 +02:00
<div class="message">
<div class="text-bot">
<div class="message-body">
{row[1]}
2023-04-16 21:44:50 +02:00
</div>
2023-08-05 18:53:54 +02:00
</div>
</div>
"""
2023-04-16 21:44:50 +02:00
2023-08-05 18:53:54 +02:00
output += "</div></div>"
2023-04-16 21:44:50 +02:00
return output
2023-04-05 16:49:59 +02:00
2023-12-04 02:45:50 +01:00
def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
if mode == 'instruct':
return generate_instruct_html(history['visible'])
elif style == 'wpp':
return generate_chat_html(history['visible'], name1, name2)
2023-04-05 16:49:59 +02:00
else:
2023-12-04 02:45:50 +01:00
return generate_cai_chat_html(history['visible'], name1, name2, style, character, reset_cache)