diff --git a/wikinote/markup.py b/wikinote/markup.py index d9b9bd4..50d09ce 100644 --- a/wikinote/markup.py +++ b/wikinote/markup.py @@ -15,7 +15,7 @@ # along with this program. If not, see . import markdown -import markdown.extensions.extra, markdown.extensions.footnotes, markdown.extensions.attr_list +import markdown.extensions.admonition, markdown.extensions.extra, markdown.extensions.footnotes, markdown.extensions.attr_list import re import xml.etree.ElementTree as ET @@ -40,6 +40,8 @@ class WNMarkdown(markdown.Markdown): # Override default Markdown processors self.preprocessors.register(NormalizeWhitespace(self), 'normalize_whitespace', 30) self.parser.blockprocessors.register(HashHeaderProcessor(self.parser), 'hashheader', 70) + self.parser.blockprocessors.register(ListIndentProcessor(self.parser), 'indent', 90) + self.parser.blockprocessors.register(UListProcessor(self.parser), 'ulist', 30) self.treeprocessors.register(AttrListTreeprocessor(self), 'attr_list', 8) # Our own processors @@ -153,6 +155,8 @@ class DirectiveProcessor(markdown.blockprocessors.BlockProcessor): if b.startswith('\t'): blocks.pop(0) content += b + else: + break content, theRest = self.parser.md.detab(content) @@ -202,104 +206,154 @@ class WrapSectionProcessor(markdown.treeprocessors.Treeprocessor): # Adapted from Python-Markdown # Allow tabs -class AdmonitionProcessor(markdown.blockprocessors.BlockProcessor): - CLASSNAME = 'admonition' - CLASSNAME_TITLE = 'admonition-title' - RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)') - RE_SPACES = re.compile(' +|\t+') - +class AdmonitionProcessor(markdown.extensions.admonition.AdmonitionProcessor): def test(self, parent, block): sibling = self.lastChild(parent) return self.RE.search(block) or \ ((block.startswith(' ' * self.tab_length) or block.startswith('\t')) and sibling is not None and sibling.get('class', '').find(self.CLASSNAME) != -1) - def run(self, parent, blocks): - sibling = self.lastChild(parent) - block = blocks.pop(0) - m = self.RE.search(block) - - if m: - block = block[m.end():] # removes the first line - - block, theRest = self.parser.md.detab(block) - - if m: - klass, title = self.get_class_and_title(m) - div = ET.SubElement(parent, 'div') - div.set('class', '{} {}'.format(self.CLASSNAME, klass)) - if title: - p = ET.SubElement(div, 'p') - p.text = title - p.set('class', self.CLASSNAME_TITLE) - else: - div = sibling - - self.parser.parseChunk(div, block) - - if theRest: - # This block contained unindented line(s) after the first indented - # line. Insert these lines as the first block of the master blocks - # list for future processing. - blocks.insert(0, theRest) + def detab(self, text): + return self.parser.md.detab(text) + +# Adapted from Python-Markdown +# Allow tabs +class ListIndentProcessor(markdown.blockprocessors.ListIndentProcessor): + def __init__(self, parser): + super().__init__(parser) + # Allow tabs + self.INDENT_RE = re.compile(r'^(([ ]{%s}|\t)+)' % self.tab_length) - def get_class_and_title(self, match): - klass, title = match.group(1).lower(), match.group(2) - klass = self.RE_SPACES.sub(' ', klass) - if title is None: - # no title was provided, use the capitalized classname as title - # e.g.: `!!! note` will render - # `

Note

` - title = klass.split(' ', 1)[0].capitalize() - elif title == '': - # an explicit blank title should not be rendered - # e.g.: `!!! warning ""` will *not* render `p` with a title - title = None - return klass, title + def test(self, parent, block): + # Allow tabs + return (block.startswith(' '*self.tab_length) or block.startswith('\t')) and not self.parser.state.isstate('detabbed') and (parent.tag in self.ITEM_TYPES or (len(parent) and parent[-1] is not None and (parent[-1].tag in self.LIST_TYPES))) + + def get_level(self, parent, block): + m = self.INDENT_RE.match(block) + if m: + # Allow tabs + if m.group(1).startswith('\t'): + indent_level = len(m.group(1)) + else: + indent_level = len(m.group(1))/self.tab_length + else: + indent_level = 0 + if self.parser.state.isstate('list'): + level = 1 + else: + level = 0 + while indent_level > level: + child = self.lastChild(parent) + if (child is not None and (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)): + if child.tag in self.LIST_TYPES: + level += 1 + parent = child + else: + break + return level, parent + + def looseDetab(self, text, level=1): + lines = text.split('\n') + for i in range(len(lines)): + if lines[i].startswith(' '*self.tab_length*level): + lines[i] = lines[i][self.tab_length*level:] + if lines[i].startswith('\t'): + lines[i] = lines[i][1:] + return '\n'.join(lines) + +class OListProcessor(markdown.blockprocessors.OListProcessor): + def __init__(self, parser): + super().__init__(parser) + # Allow tabs + self.INDENT_RE = re.compile(r'^(?:[ ]{%d,%d}|\t)((\d+\.)|[*+-])[ ]+.*' % (self.tab_length, self.tab_length * 2 - 1)) + + def run(self, parent, blocks): + items = self.get_items(blocks.pop(0)) + sibling = self.lastChild(parent) + if sibling is not None and sibling.tag in self.SIBLING_TAGS: + lst = sibling + if lst[-1].text: + p = ET.Element('p') + p.text = lst[-1].text + lst[-1].text = '' + lst[-1].insert(0, p) + lch = self.lastChild(lst[-1]) + if lch is not None and lch.tail: + p = ET.SubElement(lst[-1], 'p') + p.text = lch.tail.lstrip() + lch.tail = '' + li = ET.SubElement(lst, 'li') + self.parser.state.set('looselist') + firstitem = items.pop(0) + self.parser.parseBlocks(li, [firstitem]) + self.parser.state.reset() + elif parent.tag in ['ol', 'ul']: + lst = parent + else: + lst = ET.SubElement(parent, self.TAG) + if not self.LAZY_OL and self.STARTSWITH != '1': + lst.attrib['start'] = self.STARTSWITH + self.parser.state.set('list') + for item in items: + # Allow tabs + if item.startswith(' '*self.tab_length) or item.startswith('\t'): + self.parser.parseBlocks(lst[-1], [item]) + else: + li = ET.SubElement(lst, 'li') + self.parser.parseBlocks(li, [item]) + self.parser.state.reset() + + def get_items(self, block): + items = [] + for line in block.split('\n'): + m = self.CHILD_RE.match(line) + if m: + if not items and self.TAG == 'ol': + INTEGER_RE = re.compile(r'(\d+)') + self.STARTSWITH = INTEGER_RE.match(m.group(1)).group() + items.append(m.group(3)) + elif self.INDENT_RE.match(line): + # Allow tabs + if items[-1].startswith(' '*self.tab_length) or items[-1].startswith('\t'): + items[-1] = '{}\n{}'.format(items[-1], line) + else: + items.append(line) + else: + items[-1] = '{}\n{}'.format(items[-1], line) + return items + +class UListProcessor(OListProcessor): + TAG = 'ul' + def __init__(self, parser): + super().__init__(parser) + self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1)) # Adapted from Python-Markdown # Fix for tables -class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): - BASE_RE = r'\{\:?([^\}\n]*)\}' - HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE) - BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) - INLINE_RE = re.compile(r'^%s' % BASE_RE) - NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' - r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' - r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' - r'\uf900-\ufdcf\ufdf0-\ufffd' - r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') - +class AttrListTreeprocessor(markdown.extensions.attr_list.AttrListTreeprocessor): def run(self, doc): for elem in doc.iter(): if self.md.is_block_level(elem.tag): - # Block level: check for attrs on last line of text RE = self.BLOCK_RE if markdown.extensions.attr_list.isheader(elem) or elem.tag == 'dt': - # header or def-term: check for attrs at end of line RE = self.HEADER_RE if len(elem) and elem.tag == 'li': - # special case list items. children may include a ul or ol. pos = None - # find the ul or ol position for i, child in enumerate(elem): if child.tag in ['ul', 'ol']: pos = i break if pos is None and elem[-1].tail: - # use tail of last child. no ul or ol. m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] elif pos is not None and pos > 0 and elem[pos-1].tail: - # use tail of last child before ul or ol m = RE.search(elem[pos-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[pos-1].tail = elem[pos-1].tail[:m.start()] elif elem.text: - # use text. ul is first child. m = RE.search(elem.text) if m: self.assign_attrs(elem, m.group(1)) @@ -313,16 +367,13 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): # Remove last row elem[-1].remove(elem[-1][-1]) # tbody -> tr elif len(elem) and elem[-1].tail: - # has children. Get from tail of last child m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] if markdown.extensions.attr_list.isheader(elem): - # clean up trailing #s elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() elif elem.text: - # no children. Get from text. m = RE.search(elem.text) if not m and elem.tag == 'td': m = re.search(self.BASE_RE, elem.text) @@ -330,36 +381,13 @@ class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): self.assign_attrs(elem, m.group(1)) elem.text = elem.text[:m.start()] if markdown.extensions.attr_list.isheader(elem): - # clean up trailing #s elem.text = elem.text.rstrip('#').rstrip() else: - # inline: check for attrs at start of tail if elem.tail: m = self.INLINE_RE.match(elem.tail) if m: self.assign_attrs(elem, m.group(1)) elem.tail = elem.tail[m.end():] - - def assign_attrs(self, elem, attrs): - """ Assign attrs to element. """ - for k, v in markdown.extensions.attr_list.get_attrs(attrs): - if k == '.': - # add to class - cls = elem.get('class') - if cls: - elem.set('class', '{} {}'.format(cls, v)) - else: - elem.set('class', v) - else: - # assign attr k with v - elem.set(self.sanitize_name(k), v) - - def sanitize_name(self, name): - """ - Sanitize name as 'an XML Name, minus the ":"'. - See https://www.w3.org/TR/REC-xml-names/#NT-NCName - """ - return self.NAME_RE.sub('_', name) # Footnotes