diff --git a/modules/html_generator.py b/modules/html_generator.py
index 8160f8b6..e61fc558 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -9,6 +9,7 @@ import markdown
from PIL import Image, ImageOps
from modules import shared
+from modules.sane_markdown_lists import SaneListExtension
from modules.utils import get_available_chat_styles
# This is to store the paths to the thumbnails of the profile pictures
@@ -174,7 +175,7 @@ def convert_to_markdown(string):
result += '\n'
# Also don't add an extra \n for lists
elif stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\d+\.', stripped_line):
- result += '\n'
+ result += ' \n'
else:
result += ' \n'
@@ -195,7 +196,7 @@ def convert_to_markdown(string):
result = re.sub(list_item_pattern, r'\g<1> ' + delete_str, result)
# Convert to HTML using markdown
- html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'], tab_length=2)
+ html_output = markdown.markdown(result, extensions=['fenced_code', 'tables', SaneListExtension()])
# Remove the delete string from the HTML output
pos = html_output.rfind(delete_str)
@@ -203,7 +204,7 @@ def convert_to_markdown(string):
html_output = html_output[:pos] + html_output[pos + len(delete_str):]
else:
# Convert to HTML using markdown
- html_output = markdown.markdown(result, extensions=['fenced_code', 'tables'], tab_length=2)
+ html_output = markdown.markdown(result, extensions=['fenced_code', 'tables', SaneListExtension()])
# Unescape code blocks
pattern = re.compile(r']*>(.*?)
', re.DOTALL)
diff --git a/modules/sane_markdown_lists.py b/modules/sane_markdown_lists.py
new file mode 100644
index 00000000..1e1d76fd
--- /dev/null
+++ b/modules/sane_markdown_lists.py
@@ -0,0 +1,336 @@
+# Code based on the Sane List Extension for Python-Markdown
+# =======================================
+
+# Modify the behavior of Lists in Python-Markdown to act in a sane manner.
+
+# See https://Python-Markdown.github.io/extensions/sane_lists
+# for documentation.
+
+# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com)
+
+# All changes Copyright 2011-2014 The Python Markdown Project
+
+# License: [BSD](https://opensource.org/licenses/bsd-license.php)
+
+"""
+Modify the behavior of Lists in Python-Markdown to act in a sane manner.
+"""
+
+from __future__ import annotations
+
+import re
+import xml.etree.ElementTree as etree
+from typing import TYPE_CHECKING
+
+from markdown import Extension
+from markdown.blockparser import BlockParser
+from markdown.blockprocessors import (
+ ListIndentProcessor,
+ OListProcessor,
+ ParagraphProcessor
+)
+
+if TYPE_CHECKING: # pragma: no cover
+ from markdown import blockparser
+
+
+# The min. number of added leading spaces needed to start a nested list
+MIN_NESTED_LIST_INDENT = 2
+assert MIN_NESTED_LIST_INDENT > 1, "'MIN_NESTED_LIST_INDENT' must be > 1"
+
+
+class SaneListIndentProcessor(ListIndentProcessor):
+ """ Process children of list items.
+
+ Example
+
+ * a list item
+ process this part
+
+ or this part
+
+ """
+
+ def __init__(self, *args):
+ super().__init__(*args)
+ self.INDENT_RE = re.compile(r'^(([ ])+)')
+
+ def test(self, parent: etree.Element, block: str) -> bool:
+ return block.startswith(' ' * MIN_NESTED_LIST_INDENT) and \
+ not self.parser.state.isstate('detabbed') and \
+ (parent.tag in self.ITEM_TYPES or
+ (len(parent) and parent[-1] is not None and
+ (parent[-1].tag in self.LIST_TYPES)))
+
+ def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Element]:
+ """ Get level of indentation based on list level. """
+ # Get indent level
+ m = self.INDENT_RE.match(block)
+ if m:
+ indent_level = len(m.group(1)) / MIN_NESTED_LIST_INDENT
+ else:
+ indent_level = 0
+ if self.parser.state.isstate('list'):
+ # We're in a tight-list - so we already are at correct parent.
+ level = 1
+ else:
+ # We're in a loose-list - so we need to find parent.
+ level = 0
+ # Step through children of tree to find matching indent level.
+ while indent_level > level:
+ child = self.lastChild(parent)
+ if (child is not None and
+ (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)):
+ if child.tag in self.LIST_TYPES:
+ level += 1
+ parent = child
+ else:
+ # No more child levels. If we're short of `indent_level`,
+ # we have a code block. So we stop here.
+ break
+ return level, parent
+
+ def detab(self, text: str, length: int | None = None) -> tuple[str, str]:
+ """ Remove a tab from the front of each line of the given text. """
+ if length is None:
+ length = MIN_NESTED_LIST_INDENT
+ newtext = []
+ lines = text.split('\n')
+ for line in lines:
+ if line.startswith(' ' * length):
+ newtext.append(line[length:])
+ elif not line.strip():
+ newtext.append('')
+ else:
+ break
+ return '\n'.join(newtext), '\n'.join(lines[len(newtext):])
+
+ def looseDetab(self, text: str, level: int = 1) -> str:
+ """ Remove indentation from front of lines but allowing dedented lines. """
+ lines = text.split('\n')
+ for i in range(len(lines)):
+ if lines[i].startswith(' ' * MIN_NESTED_LIST_INDENT * level):
+ lines[i] = lines[i][MIN_NESTED_LIST_INDENT * level:]
+ return '\n'.join(lines)
+
+
+class SaneOListProcessor(OListProcessor):
+ """ Override `SIBLING_TAGS` to not include `ul` and set `LAZY_OL` to `False`. """
+
+ SIBLING_TAGS = ['ol']
+ """ Exclude `ul` from list of siblings. """
+ LAZY_OL = False
+ """ Disable lazy list behavior. """
+
+ def __init__(self, parser: blockparser.BlockParser):
+ super().__init__(parser)
+ # This restriction stems from the 'CodeBlockProcessor' class,
+ # which automatically matches blocks with an indent = self.tab_length
+ max_list_start_indent = self.tab_length - 1
+ # Detect an item (e.g., `1. item`)
+ self.RE = re.compile(r'^[ ]{0,%d}[\*_]{0,2}\d+\.[ ]+(.*)' % max_list_start_indent)
+ # Detect items on secondary lines. they can be of either list type.
+ self.CHILD_RE = re.compile(r'^[ ]{0,%d}([\*_]{0,2})((\d+\.))[ ]+(.*)' % (MIN_NESTED_LIST_INDENT - 1))
+ # Detect indented (nested) items of either type
+ self.INDENT_RE = re.compile(r'^[ ]{%d,%d}[\*_]{0,2}((\d+\.)|[*+-])[ ]+.*' %
+ (MIN_NESTED_LIST_INDENT, self.tab_length * 2 - 1))
+
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
+ # Check for multiple items in one block.
+ items = self.get_items(blocks.pop(0))
+ sibling = self.lastChild(parent)
+
+ if sibling is not None and sibling.tag in self.SIBLING_TAGS:
+ # Previous block was a list item, so set that as parent
+ lst = sibling
+ # make sure previous item is in a `p` - if the item has text,
+ # then it isn't in a `p`
+ if lst[-1].text:
+ # since it's possible there are other children for this
+ # sibling, we can't just `SubElement` the `p`, we need to
+ # insert it as the first item.
+ p = etree.Element('p')
+ p.text = lst[-1].text
+ lst[-1].text = ''
+ lst[-1].insert(0, p)
+ # if the last item has a tail, then the tail needs to be put in a `p`
+ # likely only when a header is not followed by a blank line
+ lch = self.lastChild(lst[-1])
+ if lch is not None and lch.tail:
+ p = etree.SubElement(lst[-1], 'p')
+ p.text = lch.tail.lstrip()
+ lch.tail = ''
+
+ # parse first block differently as it gets wrapped in a `p`.
+ li = etree.SubElement(lst, 'li')
+ self.parser.state.set('looselist')
+ firstitem = items.pop(0)
+ self.parser.parseBlocks(li, [firstitem])
+ self.parser.state.reset()
+ elif parent.tag in ['ol', 'ul']:
+ # this catches the edge case of a multi-item indented list whose
+ # first item is in a blank parent-list item:
+ # * * subitem1
+ # * subitem2
+ # see also `ListIndentProcessor`
+ lst = parent
+ else:
+ # This is a new list so create parent with appropriate tag.
+ lst = etree.SubElement(parent, self.TAG)
+ # Check if a custom start integer is set
+ if not self.LAZY_OL and self.STARTSWITH != '1':
+ lst.attrib['start'] = self.STARTSWITH
+
+ self.parser.state.set('list')
+ # Loop through items in block, recursively parsing each with the
+ # appropriate parent.
+ for item in items:
+ if item.startswith(" " * MIN_NESTED_LIST_INDENT):
+ # Item is indented. Parse with last item as parent
+ self.parser.parseBlocks(lst[-1], [item])
+ else:
+ # New item. Create `li` and parse with it as parent
+ li = etree.SubElement(lst, 'li')
+ self.parser.parseBlocks(li, [item])
+ self.parser.state.reset()
+
+ def looseDetab(self, text: str, indent_length: int, level: int = 1) -> str:
+ """ Remove indentation from front of lines but allowing dedented lines. """
+ lines = text.split('\n')
+ for i in range(len(lines)):
+ if lines[i].startswith(' ' * indent_length * level):
+ lines[i] = lines[i][indent_length * level:]
+ return '\n'.join(lines)
+
+ def get_items(self, block: str) -> list[str]:
+ """ Break a block into list items. """
+ # If first level of list is indented, remove that indentation
+ if (indent_len := len(block) - len(block.lstrip())) > 0:
+ block = self.looseDetab(block, indent_len)
+ items = []
+ for line in block.split('\n'):
+ m = self.CHILD_RE.match(line)
+ if m:
+ # This is a new list item
+ # Check first item for the start index
+ if not items:
+ # Detect the integer value of first list item
+ INTEGER_RE = re.compile(r'(\d+)')
+ self.STARTSWITH = INTEGER_RE.match(m.group(2)).group()
+ # Append to the list
+ items.append(m.group(1) + m.group(4))
+ elif self.INDENT_RE.match(line):
+ # This is an indented (possibly nested) item.
+ if items[-1].startswith(' ' * MIN_NESTED_LIST_INDENT):
+ # Previous item was indented. Append to that item.
+ items[-1] = '{}\n{}'.format(items[-1], line)
+ else:
+ items.append(line)
+ else:
+ # This is another line of previous item. Append to that item.
+ items[-1] = '{}\n{}'.format(items[-1], line)
+ return items
+
+
+class SaneUListProcessor(SaneOListProcessor):
+ """ Override `SIBLING_TAGS` to not include `ol`. """
+
+ TAG: str = 'ul'
+ SIBLING_TAGS = ['ul']
+ """ Exclude `ol` from list of siblings. """
+
+ def __init__(self, parser: blockparser.BlockParser):
+ super().__init__(parser)
+ # Detect an item (e.g., `- item` or `+ item` or `* item`).
+ max_list_start_indent = self.tab_length - 1
+ self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % max_list_start_indent)
+ self.CHILD_RE = re.compile(r'^[ ]{0,%d}(([*+-]))[ ]+(.*)' % (MIN_NESTED_LIST_INDENT - 1))
+
+ def get_items(self, block: str) -> list[str]:
+ """ Break a block into list items. """
+ # If first level of list is indented, remove that indentation
+ if (indent_len := len(block) - len(block.lstrip())) > 0:
+ block = self.looseDetab(block, indent_len)
+ items = []
+ for line in block.split('\n'):
+ m = self.CHILD_RE.match(line)
+ if m:
+ # Append to the list
+ items.append(m.group(3))
+ elif self.INDENT_RE.match(line):
+ # This is an indented (possibly nested) item.
+ if items[-1].startswith(' ' * MIN_NESTED_LIST_INDENT):
+ # Previous item was indented. Append to that item.
+ items[-1] = '{}\n{}'.format(items[-1], line)
+ else:
+ items.append(line)
+ else:
+ # This is another line of previous item. Append to that item.
+ items[-1] = '{}\n{}'.format(items[-1], line)
+ return items
+
+
+class SaneParagraphProcessor(ParagraphProcessor):
+ """ Process Paragraph blocks. """
+
+ def __init__(self, parser: BlockParser):
+ super().__init__(parser)
+ max_list_start_indent = self.tab_length - 1
+ self.LIST_RE = re.compile(r"\s{2}\n(\s{0,%d}[\d+*-])" % max_list_start_indent)
+
+ def run(self, parent: etree.Element, blocks: list[str]) -> None:
+ block = blocks.pop(0)
+ if block.strip():
+ # Not a blank block. Add to parent, otherwise throw it away.
+ if self.parser.state.isstate('list'):
+ # The parent is a tight-list.
+ #
+ # Check for any children. This will likely only happen in a
+ # tight-list when a header isn't followed by a blank line.
+ # For example:
+ #
+ # * # Header
+ # Line 2 of list item - not part of header.
+ sibling = self.lastChild(parent)
+ if sibling is not None:
+ # Insert after sibling.
+ if sibling.tail:
+ sibling.tail = '{}\n{}'.format(sibling.tail, block)
+ else:
+ sibling.tail = '\n%s' % block
+ else:
+ # Append to parent.text
+ if parent.text:
+ parent.text = '{}\n{}'.format(parent.text, block)
+ else:
+ parent.text = block.lstrip()
+ else:
+ # Check if paragraph contains a list
+ next_list_block = None
+ if list_match := self.LIST_RE.search(block):
+ list_start = list_match.end() - len(list_match.group(1))
+ next_list_block = block[list_start:]
+ block = block[:list_start]
+
+ # Create a regular paragraph
+ p = etree.SubElement(parent, 'p')
+ p.text = block.lstrip()
+
+ # If a list was found, parse its block separately with the paragraph as the parent
+ if next_list_block:
+ self.parser.parseBlocks(p, [next_list_block])
+
+
+class SaneListExtension(Extension):
+ """ Add sane lists to Markdown. """
+
+ def extendMarkdown(self, md):
+ """ Override existing Processors. """
+ md.parser.blockprocessors.register(SaneListIndentProcessor(md.parser), 'indent', 90)
+ md.parser.blockprocessors.register(SaneOListProcessor(md.parser), 'olist', 40)
+ md.parser.blockprocessors.register(SaneUListProcessor(md.parser), 'ulist', 30)
+ md.parser.blockprocessors.register(SaneParagraphProcessor(md.parser), 'paragraph', 10)
+
+
+def makeExtension(**kwargs): # pragma: no cover
+ return SaneListExtension(**kwargs)