From 4b3384e353b9630bdc16efe946c02daa78b33f48 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:10:57 -0700 Subject: [PATCH] Handle unfinished lists during markdown streaming --- download-model.py | 12 ++++++------ modules/html_generator.py | 22 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/download-model.py b/download-model.py index 0f650516..e1afa9ef 100644 --- a/download-model.py +++ b/download-model.py @@ -75,12 +75,12 @@ class ModelDownloader: if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')): is_lora = True - is_pytorch = re.match("(pytorch|adapter|gptq)_model.*\.bin", fname) - is_safetensors = re.match(".*\.safetensors", fname) - is_pt = re.match(".*\.pt", fname) - is_ggml = re.match(".*ggml.*\.bin", fname) - is_tokenizer = re.match("(tokenizer|ice|spiece).*\.model", fname) - is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer + is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname) + is_safetensors = re.match(r".*\.safetensors", fname) + is_pt = re.match(r".*\.pt", fname) + is_ggml = re.match(r".*ggml.*\.bin", fname) + is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname) + is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer if any((is_pytorch, is_safetensors, is_pt, is_ggml, is_tokenizer, is_text)): if 'lfs' in dict[i]: sha256.append([fname, dict[i]['lfs']['oid']]) diff --git a/modules/html_generator.py b/modules/html_generator.py index ab0aeab0..c6ca13b6 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -61,8 +61,26 @@ def convert_to_markdown(string): if is_code: result = result + '```' # Unfinished code block - string = result.strip() - return markdown.markdown(string, extensions=['fenced_code', 'tables']) + result = result.strip() + + # Unfinished list, like "\n1.". A |delete| string is added and then + # removed to force a
. + if re.search(r'(\d+\.?)$', result): + delete_str = '|delete|' + + if not result.endswith('.'): + result += '.' + + result = re.sub(r'(\d+\.)$', r'\g<1> ' + delete_str, result) + + html = markdown.markdown(result, extensions=['fenced_code', 'tables']) + pos = html.rfind(delete_str) + if pos > -1: + html = html[:pos] + html[pos + len(delete_str):] + else: + html = markdown.markdown(result, extensions=['fenced_code', 'tables']) + + return html def generate_basic_html(string):