From 4b3384e353b9630bdc16efe946c02daa78b33f48 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 3 Aug 2023 17:10:57 -0700
Subject: [PATCH] Handle unfinished lists during markdown streaming

---
 download-model.py         | 12 ++++++------
 modules/html_generator.py | 22 ++++++++++++++++++++--
 2 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/download-model.py b/download-model.py
index 0f650516..e1afa9ef 100644
--- a/download-model.py
+++ b/download-model.py
@@ -75,12 +75,12 @@ class ModelDownloader:
                 if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):
                     is_lora = True
 
-                is_pytorch = re.match("(pytorch|adapter|gptq)_model.*\.bin", fname)
-                is_safetensors = re.match(".*\.safetensors", fname)
-                is_pt = re.match(".*\.pt", fname)
-                is_ggml = re.match(".*ggml.*\.bin", fname)
-                is_tokenizer = re.match("(tokenizer|ice|spiece).*\.model", fname)
-                is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer
+                is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname)
+                is_safetensors = re.match(r".*\.safetensors", fname)
+                is_pt = re.match(r".*\.pt", fname)
+                is_ggml = re.match(r".*ggml.*\.bin", fname)
+                is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname)
+                is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer
                 if any((is_pytorch, is_safetensors, is_pt, is_ggml, is_tokenizer, is_text)):
                     if 'lfs' in dict[i]:
                         sha256.append([fname, dict[i]['lfs']['oid']])
diff --git a/modules/html_generator.py b/modules/html_generator.py
index ab0aeab0..c6ca13b6 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -61,8 +61,26 @@ def convert_to_markdown(string):
     if is_code:
         result = result + '```'  # Unfinished code block
 
-    string = result.strip()
-    return markdown.markdown(string, extensions=['fenced_code', 'tables'])
+    result = result.strip()
+
+    # Unfinished list, like "\n1.". A |delete| string is added and then
+    # removed to force a <ol> to be generated instead of a <p>.
+    if re.search(r'(\d+\.?)$', result):
+        delete_str = '|delete|'
+
+        if not result.endswith('.'):
+            result += '.'
+
+        result = re.sub(r'(\d+\.)$', r'\g<1> ' + delete_str, result)
+
+        html = markdown.markdown(result, extensions=['fenced_code', 'tables'])
+        pos = html.rfind(delete_str)
+        if pos > -1:
+            html = html[:pos] + html[pos + len(delete_str):]
+    else:
+        html = markdown.markdown(result, extensions=['fenced_code', 'tables'])
+
+    return html
 
 
 def generate_basic_html(string):