Allow lists indented with tabs
Clean up custom Markdown overrides
This commit is contained in:
parent
70f02e24ac
commit
a32795fbe6
@ -15,7 +15,7 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import markdown
|
import markdown
|
||||||
import markdown.extensions.extra, markdown.extensions.footnotes, markdown.extensions.attr_list
|
import markdown.extensions.admonition, markdown.extensions.extra, markdown.extensions.footnotes, markdown.extensions.attr_list
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
@ -40,6 +40,8 @@ class WNMarkdown(markdown.Markdown):
|
|||||||
# Override default Markdown processors
|
# Override default Markdown processors
|
||||||
self.preprocessors.register(NormalizeWhitespace(self), 'normalize_whitespace', 30)
|
self.preprocessors.register(NormalizeWhitespace(self), 'normalize_whitespace', 30)
|
||||||
self.parser.blockprocessors.register(HashHeaderProcessor(self.parser), 'hashheader', 70)
|
self.parser.blockprocessors.register(HashHeaderProcessor(self.parser), 'hashheader', 70)
|
||||||
|
self.parser.blockprocessors.register(ListIndentProcessor(self.parser), 'indent', 90)
|
||||||
|
self.parser.blockprocessors.register(UListProcessor(self.parser), 'ulist', 30)
|
||||||
self.treeprocessors.register(AttrListTreeprocessor(self), 'attr_list', 8)
|
self.treeprocessors.register(AttrListTreeprocessor(self), 'attr_list', 8)
|
||||||
|
|
||||||
# Our own processors
|
# Our own processors
|
||||||
@ -153,6 +155,8 @@ class DirectiveProcessor(markdown.blockprocessors.BlockProcessor):
|
|||||||
if b.startswith('\t'):
|
if b.startswith('\t'):
|
||||||
blocks.pop(0)
|
blocks.pop(0)
|
||||||
content += b
|
content += b
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
content, theRest = self.parser.md.detab(content)
|
content, theRest = self.parser.md.detab(content)
|
||||||
|
|
||||||
@ -202,104 +206,154 @@ class WrapSectionProcessor(markdown.treeprocessors.Treeprocessor):
|
|||||||
|
|
||||||
# Adapted from Python-Markdown
|
# Adapted from Python-Markdown
|
||||||
# Allow tabs
|
# Allow tabs
|
||||||
class AdmonitionProcessor(markdown.blockprocessors.BlockProcessor):
|
class AdmonitionProcessor(markdown.extensions.admonition.AdmonitionProcessor):
|
||||||
CLASSNAME = 'admonition'
|
|
||||||
CLASSNAME_TITLE = 'admonition-title'
|
|
||||||
RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)')
|
|
||||||
RE_SPACES = re.compile(' +|\t+')
|
|
||||||
|
|
||||||
def test(self, parent, block):
|
def test(self, parent, block):
|
||||||
sibling = self.lastChild(parent)
|
sibling = self.lastChild(parent)
|
||||||
return self.RE.search(block) or \
|
return self.RE.search(block) or \
|
||||||
((block.startswith(' ' * self.tab_length) or block.startswith('\t')) and sibling is not None and
|
((block.startswith(' ' * self.tab_length) or block.startswith('\t')) and sibling is not None and
|
||||||
sibling.get('class', '').find(self.CLASSNAME) != -1)
|
sibling.get('class', '').find(self.CLASSNAME) != -1)
|
||||||
|
|
||||||
def run(self, parent, blocks):
|
def detab(self, text):
|
||||||
sibling = self.lastChild(parent)
|
return self.parser.md.detab(text)
|
||||||
block = blocks.pop(0)
|
|
||||||
m = self.RE.search(block)
|
# Adapted from Python-Markdown
|
||||||
|
# Allow tabs
|
||||||
if m:
|
class ListIndentProcessor(markdown.blockprocessors.ListIndentProcessor):
|
||||||
block = block[m.end():] # removes the first line
|
def __init__(self, parser):
|
||||||
|
super().__init__(parser)
|
||||||
block, theRest = self.parser.md.detab(block)
|
# Allow tabs
|
||||||
|
self.INDENT_RE = re.compile(r'^(([ ]{%s}|\t)+)' % self.tab_length)
|
||||||
if m:
|
|
||||||
klass, title = self.get_class_and_title(m)
|
|
||||||
div = ET.SubElement(parent, 'div')
|
|
||||||
div.set('class', '{} {}'.format(self.CLASSNAME, klass))
|
|
||||||
if title:
|
|
||||||
p = ET.SubElement(div, 'p')
|
|
||||||
p.text = title
|
|
||||||
p.set('class', self.CLASSNAME_TITLE)
|
|
||||||
else:
|
|
||||||
div = sibling
|
|
||||||
|
|
||||||
self.parser.parseChunk(div, block)
|
|
||||||
|
|
||||||
if theRest:
|
|
||||||
# This block contained unindented line(s) after the first indented
|
|
||||||
# line. Insert these lines as the first block of the master blocks
|
|
||||||
# list for future processing.
|
|
||||||
blocks.insert(0, theRest)
|
|
||||||
|
|
||||||
def get_class_and_title(self, match):
|
def test(self, parent, block):
|
||||||
klass, title = match.group(1).lower(), match.group(2)
|
# Allow tabs
|
||||||
klass = self.RE_SPACES.sub(' ', klass)
|
return (block.startswith(' '*self.tab_length) or block.startswith('\t')) and not self.parser.state.isstate('detabbed') and (parent.tag in self.ITEM_TYPES or (len(parent) and parent[-1] is not None and (parent[-1].tag in self.LIST_TYPES)))
|
||||||
if title is None:
|
|
||||||
# no title was provided, use the capitalized classname as title
|
def get_level(self, parent, block):
|
||||||
# e.g.: `!!! note` will render
|
m = self.INDENT_RE.match(block)
|
||||||
# `<p class="admonition-title">Note</p>`
|
if m:
|
||||||
title = klass.split(' ', 1)[0].capitalize()
|
# Allow tabs
|
||||||
elif title == '':
|
if m.group(1).startswith('\t'):
|
||||||
# an explicit blank title should not be rendered
|
indent_level = len(m.group(1))
|
||||||
# e.g.: `!!! warning ""` will *not* render `p` with a title
|
else:
|
||||||
title = None
|
indent_level = len(m.group(1))/self.tab_length
|
||||||
return klass, title
|
else:
|
||||||
|
indent_level = 0
|
||||||
|
if self.parser.state.isstate('list'):
|
||||||
|
level = 1
|
||||||
|
else:
|
||||||
|
level = 0
|
||||||
|
while indent_level > level:
|
||||||
|
child = self.lastChild(parent)
|
||||||
|
if (child is not None and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)):
|
||||||
|
if child.tag in self.LIST_TYPES:
|
||||||
|
level += 1
|
||||||
|
parent = child
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return level, parent
|
||||||
|
|
||||||
|
def looseDetab(self, text, level=1):
|
||||||
|
lines = text.split('\n')
|
||||||
|
for i in range(len(lines)):
|
||||||
|
if lines[i].startswith(' '*self.tab_length*level):
|
||||||
|
lines[i] = lines[i][self.tab_length*level:]
|
||||||
|
if lines[i].startswith('\t'):
|
||||||
|
lines[i] = lines[i][1:]
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
class OListProcessor(markdown.blockprocessors.OListProcessor):
|
||||||
|
def __init__(self, parser):
|
||||||
|
super().__init__(parser)
|
||||||
|
# Allow tabs
|
||||||
|
self.INDENT_RE = re.compile(r'^(?:[ ]{%d,%d}|\t)((\d+\.)|[*+-])[ ]+.*' % (self.tab_length, self.tab_length * 2 - 1))
|
||||||
|
|
||||||
|
def run(self, parent, blocks):
|
||||||
|
items = self.get_items(blocks.pop(0))
|
||||||
|
sibling = self.lastChild(parent)
|
||||||
|
if sibling is not None and sibling.tag in self.SIBLING_TAGS:
|
||||||
|
lst = sibling
|
||||||
|
if lst[-1].text:
|
||||||
|
p = ET.Element('p')
|
||||||
|
p.text = lst[-1].text
|
||||||
|
lst[-1].text = ''
|
||||||
|
lst[-1].insert(0, p)
|
||||||
|
lch = self.lastChild(lst[-1])
|
||||||
|
if lch is not None and lch.tail:
|
||||||
|
p = ET.SubElement(lst[-1], 'p')
|
||||||
|
p.text = lch.tail.lstrip()
|
||||||
|
lch.tail = ''
|
||||||
|
li = ET.SubElement(lst, 'li')
|
||||||
|
self.parser.state.set('looselist')
|
||||||
|
firstitem = items.pop(0)
|
||||||
|
self.parser.parseBlocks(li, [firstitem])
|
||||||
|
self.parser.state.reset()
|
||||||
|
elif parent.tag in ['ol', 'ul']:
|
||||||
|
lst = parent
|
||||||
|
else:
|
||||||
|
lst = ET.SubElement(parent, self.TAG)
|
||||||
|
if not self.LAZY_OL and self.STARTSWITH != '1':
|
||||||
|
lst.attrib['start'] = self.STARTSWITH
|
||||||
|
self.parser.state.set('list')
|
||||||
|
for item in items:
|
||||||
|
# Allow tabs
|
||||||
|
if item.startswith(' '*self.tab_length) or item.startswith('\t'):
|
||||||
|
self.parser.parseBlocks(lst[-1], [item])
|
||||||
|
else:
|
||||||
|
li = ET.SubElement(lst, 'li')
|
||||||
|
self.parser.parseBlocks(li, [item])
|
||||||
|
self.parser.state.reset()
|
||||||
|
|
||||||
|
def get_items(self, block):
|
||||||
|
items = []
|
||||||
|
for line in block.split('\n'):
|
||||||
|
m = self.CHILD_RE.match(line)
|
||||||
|
if m:
|
||||||
|
if not items and self.TAG == 'ol':
|
||||||
|
INTEGER_RE = re.compile(r'(\d+)')
|
||||||
|
self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
|
||||||
|
items.append(m.group(3))
|
||||||
|
elif self.INDENT_RE.match(line):
|
||||||
|
# Allow tabs
|
||||||
|
if items[-1].startswith(' '*self.tab_length) or items[-1].startswith('\t'):
|
||||||
|
items[-1] = '{}\n{}'.format(items[-1], line)
|
||||||
|
else:
|
||||||
|
items.append(line)
|
||||||
|
else:
|
||||||
|
items[-1] = '{}\n{}'.format(items[-1], line)
|
||||||
|
return items
|
||||||
|
|
||||||
|
class UListProcessor(OListProcessor):
|
||||||
|
TAG = 'ul'
|
||||||
|
def __init__(self, parser):
|
||||||
|
super().__init__(parser)
|
||||||
|
self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
|
||||||
|
|
||||||
# Adapted from Python-Markdown
|
# Adapted from Python-Markdown
|
||||||
# Fix for tables
|
# Fix for tables
|
||||||
class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor):
|
class AttrListTreeprocessor(markdown.extensions.attr_list.AttrListTreeprocessor):
|
||||||
BASE_RE = r'\{\:?([^\}\n]*)\}'
|
|
||||||
HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
|
|
||||||
BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
|
|
||||||
INLINE_RE = re.compile(r'^%s' % BASE_RE)
|
|
||||||
NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
|
|
||||||
r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
|
|
||||||
r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
|
|
||||||
r'\uf900-\ufdcf\ufdf0-\ufffd'
|
|
||||||
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
|
|
||||||
|
|
||||||
def run(self, doc):
|
def run(self, doc):
|
||||||
for elem in doc.iter():
|
for elem in doc.iter():
|
||||||
if self.md.is_block_level(elem.tag):
|
if self.md.is_block_level(elem.tag):
|
||||||
# Block level: check for attrs on last line of text
|
|
||||||
RE = self.BLOCK_RE
|
RE = self.BLOCK_RE
|
||||||
if markdown.extensions.attr_list.isheader(elem) or elem.tag == 'dt':
|
if markdown.extensions.attr_list.isheader(elem) or elem.tag == 'dt':
|
||||||
# header or def-term: check for attrs at end of line
|
|
||||||
RE = self.HEADER_RE
|
RE = self.HEADER_RE
|
||||||
if len(elem) and elem.tag == 'li':
|
if len(elem) and elem.tag == 'li':
|
||||||
# special case list items. children may include a ul or ol.
|
|
||||||
pos = None
|
pos = None
|
||||||
# find the ul or ol position
|
|
||||||
for i, child in enumerate(elem):
|
for i, child in enumerate(elem):
|
||||||
if child.tag in ['ul', 'ol']:
|
if child.tag in ['ul', 'ol']:
|
||||||
pos = i
|
pos = i
|
||||||
break
|
break
|
||||||
if pos is None and elem[-1].tail:
|
if pos is None and elem[-1].tail:
|
||||||
# use tail of last child. no ul or ol.
|
|
||||||
m = RE.search(elem[-1].tail)
|
m = RE.search(elem[-1].tail)
|
||||||
if m:
|
if m:
|
||||||
self.assign_attrs(elem, m.group(1))
|
self.assign_attrs(elem, m.group(1))
|
||||||
elem[-1].tail = elem[-1].tail[:m.start()]
|
elem[-1].tail = elem[-1].tail[:m.start()]
|
||||||
elif pos is not None and pos > 0 and elem[pos-1].tail:
|
elif pos is not None and pos > 0 and elem[pos-1].tail:
|
||||||
# use tail of last child before ul or ol
|
|
||||||
m = RE.search(elem[pos-1].tail)
|
m = RE.search(elem[pos-1].tail)
|
||||||
if m:
|
if m:
|
||||||
self.assign_attrs(elem, m.group(1))
|
self.assign_attrs(elem, m.group(1))
|
||||||
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
|
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
|
||||||
elif elem.text:
|
elif elem.text:
|
||||||
# use text. ul is first child.
|
|
||||||
m = RE.search(elem.text)
|
m = RE.search(elem.text)
|
||||||
if m:
|
if m:
|
||||||
self.assign_attrs(elem, m.group(1))
|
self.assign_attrs(elem, m.group(1))
|
||||||
@ -313,16 +367,13 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor):
|
|||||||
# Remove last row
|
# Remove last row
|
||||||
elem[-1].remove(elem[-1][-1]) # tbody -> tr
|
elem[-1].remove(elem[-1][-1]) # tbody -> tr
|
||||||
elif len(elem) and elem[-1].tail:
|
elif len(elem) and elem[-1].tail:
|
||||||
# has children. Get from tail of last child
|
|
||||||
m = RE.search(elem[-1].tail)
|
m = RE.search(elem[-1].tail)
|
||||||
if m:
|
if m:
|
||||||
self.assign_attrs(elem, m.group(1))
|
self.assign_attrs(elem, m.group(1))
|
||||||
elem[-1].tail = elem[-1].tail[:m.start()]
|
elem[-1].tail = elem[-1].tail[:m.start()]
|
||||||
if markdown.extensions.attr_list.isheader(elem):
|
if markdown.extensions.attr_list.isheader(elem):
|
||||||
# clean up trailing #s
|
|
||||||
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
|
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
|
||||||
elif elem.text:
|
elif elem.text:
|
||||||
# no children. Get from text.
|
|
||||||
m = RE.search(elem.text)
|
m = RE.search(elem.text)
|
||||||
if not m and elem.tag == 'td':
|
if not m and elem.tag == 'td':
|
||||||
m = re.search(self.BASE_RE, elem.text)
|
m = re.search(self.BASE_RE, elem.text)
|
||||||
@ -330,36 +381,13 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor):
|
|||||||
self.assign_attrs(elem, m.group(1))
|
self.assign_attrs(elem, m.group(1))
|
||||||
elem.text = elem.text[:m.start()]
|
elem.text = elem.text[:m.start()]
|
||||||
if markdown.extensions.attr_list.isheader(elem):
|
if markdown.extensions.attr_list.isheader(elem):
|
||||||
# clean up trailing #s
|
|
||||||
elem.text = elem.text.rstrip('#').rstrip()
|
elem.text = elem.text.rstrip('#').rstrip()
|
||||||
else:
|
else:
|
||||||
# inline: check for attrs at start of tail
|
|
||||||
if elem.tail:
|
if elem.tail:
|
||||||
m = self.INLINE_RE.match(elem.tail)
|
m = self.INLINE_RE.match(elem.tail)
|
||||||
if m:
|
if m:
|
||||||
self.assign_attrs(elem, m.group(1))
|
self.assign_attrs(elem, m.group(1))
|
||||||
elem.tail = elem.tail[m.end():]
|
elem.tail = elem.tail[m.end():]
|
||||||
|
|
||||||
def assign_attrs(self, elem, attrs):
|
|
||||||
""" Assign attrs to element. """
|
|
||||||
for k, v in markdown.extensions.attr_list.get_attrs(attrs):
|
|
||||||
if k == '.':
|
|
||||||
# add to class
|
|
||||||
cls = elem.get('class')
|
|
||||||
if cls:
|
|
||||||
elem.set('class', '{} {}'.format(cls, v))
|
|
||||||
else:
|
|
||||||
elem.set('class', v)
|
|
||||||
else:
|
|
||||||
# assign attr k with v
|
|
||||||
elem.set(self.sanitize_name(k), v)
|
|
||||||
|
|
||||||
def sanitize_name(self, name):
|
|
||||||
"""
|
|
||||||
Sanitize name as 'an XML Name, minus the ":"'.
|
|
||||||
See https://www.w3.org/TR/REC-xml-names/#NT-NCName
|
|
||||||
"""
|
|
||||||
return self.NAME_RE.sub('_', name)
|
|
||||||
|
|
||||||
# Footnotes
|
# Footnotes
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user