# WikiNote3 # Copyright © 2020 Lee Yingtong Li (RunasSudo) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import markdown import markdown.extensions.extra, markdown.extensions.footnotes, markdown.extensions.attr_list import re import xml.etree.ElementTree as ET from .mdx_urlize import UrlizeExtension directives = {} roles = {} class WNMarkdown(markdown.Markdown): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.registerExtensions([FootnoteExtension(), UrlizeExtension(), 'toc', 'tables'], {}) self.meta = {} # Markdown in HTML self.preprocessors['html_block'].markdown_in_raw = True self.parser.blockprocessors.register(markdown.extensions.extra.MarkdownInHtmlProcessor(self.parser), 'markdown_block', 105) self.parser.blockprocessors.tag_counter = -1 self.parser.blockprocessors.contain_span_tags = re.compile(r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE) # Override default Markdown processors self.preprocessors.register(NormalizeWhitespace(self), 'normalize_whitespace', 30) self.parser.blockprocessors.register(HashHeaderProcessor(self.parser), 'hashheader', 70) self.treeprocessors.register(AttrListTreeprocessor(self), 'attr_list', 8) # Our own processors self.parser.blockprocessors.register(DirectiveProcessor(self.parser), 'directive', 95) self.parser.blockprocessors.register(AdmonitionProcessor(self.parser), 'admonition', 105) self.inlinePatterns.register(BlueProcessor(self.parser), 'blue_em', 65) self.inlinePatterns.register(RoleProcessor(self.parser), 'role', 500) self.treeprocessors.register(WrapSectionProcessor(self), 'wrap_sections', 100) # Override def reset(self): super().reset() self.meta = {} return self # Based on Markdown.convert def parse(self, source): if not source.strip(): return '' self.lines = source.split('\n') for prep in self.preprocessors: self.lines = prep.run(self.lines) root = self.parser.parseDocument(self.lines).getroot() for treeprocessor in self.treeprocessors: newRoot = treeprocessor.run(root) if newRoot is not None: root = newRoot return root # Based on Markdown.convert def serialise(self, root): output = self.serializer(root) if self.stripTopLevelTags: try: start = output.index('<{}>'.format(self.doc_tag)) + len(self.doc_tag) + 2 end = output.rindex(''.format(self.doc_tag)) output = output[start:end].strip() except ValueError: if output.strip().endswith('<{} />'.format(self.doc_tag)): output = '' for pp in self.postprocessors: output = pp.run(output) return output.strip() # Put it together def convert(self, source): root = self.parse(source) return self.serialise(root) def detab(self, text): newtext = [] lines = text.split('\n') for line in lines: if line.startswith(' '*self.tab_length): newtext.append(line[self.tab_length:]) elif line.startswith('\t'): newtext.append(line[1:]) elif not line.strip(): newtext.append('') else: break return '\n'.join(newtext), '\n'.join(lines[len(newtext):]) class HashHeaderProcessor(markdown.blockprocessors.HashHeaderProcessor): # Override to add 1 to level def run(self, parent, blocks): block = blocks.pop(0) m = self.RE.search(block) before = block[:m.start()] after = block[m.end():] if before: self.parser.parseBlocks(parent, [before]) h = ET.SubElement(parent, 'h{}'.format(len(m.group('level')) + 1)) # Add 1 to level h.text = m.group('header').strip() if after: blocks.insert(0, after) class NormalizeWhitespace(markdown.preprocessors.Preprocessor): # Override to retain tabs def run(self, lines): source = '\n'.join(lines) source = source.replace(markdown.util.STX, "").replace(markdown.util.ETX, "") source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n" source = re.sub(r'(?<=\n) +\n', '\n', source) return source.split('\n') class DirectiveProcessor(markdown.blockprocessors.BlockProcessor): RE = re.compile(r'^.. +(?P[a-zA-Z0-9_-]+?)::(?: +(?P.*?))?(?:\n|$)') def test(self, parent, block): return bool(self.RE.search(block)) def run(self, parent, blocks): block = blocks.pop(0) m = self.RE.search(block) # Get directive content if '\n' in block: content = block[block.index('\n') + 1:] else: content = '' for b in blocks[:]: if b.startswith('\t'): blocks.pop(0) content += b content, theRest = self.parser.md.detab(content) directive = directives[m.group('name')](self.parser.md, arg=m.group('arg') or '', content=content) el = directive.render() el.directive = directive parent.append(el) if theRest: blocks.insert(0, theRest) class RoleProcessor(markdown.inlinepatterns.InlineProcessor): def __init__(self, md): super().__init__(r':(?P[^:]+?):`(?P[^`]+?)`', md) def handleMatch(self, m, data): role = roles[m.group('name')](self.md, m.group('content')) el = role.render() el.role = role return el, m.start(0), m.end(0) class BlueProcessor(markdown.inlinepatterns.InlineProcessor): def __init__(self, md): super().__init__(r'!!(.+?)!!', md) def handleMatch(self, m, data): el = ET.Element('span') el.text = m.group(1) el.set('class', 'blue') return el, m.start(0), m.end(0) class WrapSectionProcessor(markdown.treeprocessors.Treeprocessor): def run(self, root): section = ET.Element('section') for child in list(root): if child.tag in ('h1', 'h2', 'h3'): if len(section) > 0: root.insert(list(root).index(child), section) section = ET.Element('section') else: section.append(child) root.remove(child) if len(section) > 0: root.append(section) # Adapted from Python-Markdown # Allow tabs class AdmonitionProcessor(markdown.blockprocessors.BlockProcessor): CLASSNAME = 'admonition' CLASSNAME_TITLE = 'admonition-title' RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)') RE_SPACES = re.compile(' +|\t+') def test(self, parent, block): sibling = self.lastChild(parent) return self.RE.search(block) or \ ((block.startswith(' ' * self.tab_length) or block.startswith('\t')) and sibling is not None and sibling.get('class', '').find(self.CLASSNAME) != -1) def run(self, parent, blocks): sibling = self.lastChild(parent) block = blocks.pop(0) m = self.RE.search(block) if m: block = block[m.end():] # removes the first line block, theRest = self.parser.md.detab(block) if m: klass, title = self.get_class_and_title(m) div = ET.SubElement(parent, 'div') div.set('class', '{} {}'.format(self.CLASSNAME, klass)) if title: p = ET.SubElement(div, 'p') p.text = title p.set('class', self.CLASSNAME_TITLE) else: div = sibling self.parser.parseChunk(div, block) if theRest: # This block contained unindented line(s) after the first indented # line. Insert these lines as the first block of the master blocks # list for future processing. blocks.insert(0, theRest) def get_class_and_title(self, match): klass, title = match.group(1).lower(), match.group(2) klass = self.RE_SPACES.sub(' ', klass) if title is None: # no title was provided, use the capitalized classname as title # e.g.: `!!! note` will render # `

Note

` title = klass.split(' ', 1)[0].capitalize() elif title == '': # an explicit blank title should not be rendered # e.g.: `!!! warning ""` will *not* render `p` with a title title = None return klass, title # Adapted from Python-Markdown # Fix for tables class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor): BASE_RE = r'\{\:?([^\}\n]*)\}' HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE) BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE) INLINE_RE = re.compile(r'^%s' % BASE_RE) NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' r'\uf900-\ufdcf\ufdf0-\ufffd' r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') def run(self, doc): for elem in doc.iter(): if self.md.is_block_level(elem.tag): # Block level: check for attrs on last line of text RE = self.BLOCK_RE if markdown.extensions.attr_list.isheader(elem) or elem.tag == 'dt': # header or def-term: check for attrs at end of line RE = self.HEADER_RE if len(elem) and elem.tag == 'li': # special case list items. children may include a ul or ol. pos = None # find the ul or ol position for i, child in enumerate(elem): if child.tag in ['ul', 'ol']: pos = i break if pos is None and elem[-1].tail: # use tail of last child. no ul or ol. m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] elif pos is not None and pos > 0 and elem[pos-1].tail: # use tail of last child before ul or ol m = RE.search(elem[pos-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[pos-1].tail = elem[pos-1].tail[:m.start()] elif elem.text: # use text. ul is first child. m = RE.search(elem.text) if m: self.assign_attrs(elem, m.group(1)) elem.text = elem.text[:m.start()] elif len(elem) and elem.tag == 'table' and len(elem[-1]) and len(elem[-1][-1]) and elem[-1][-1][0].text: # SPECIAL CASE table, use last row RE = self.INLINE_RE m = RE.search(elem[-1][-1][0].text) # tbody -> tr -> td if m: self.assign_attrs(elem, m.group(1)) # Remove last row elem[-1].remove(elem[-1][-1]) # tbody -> tr elif len(elem) and elem[-1].tail: # has children. Get from tail of last child m = RE.search(elem[-1].tail) if m: self.assign_attrs(elem, m.group(1)) elem[-1].tail = elem[-1].tail[:m.start()] if markdown.extensions.attr_list.isheader(elem): # clean up trailing #s elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() elif elem.text: # no children. Get from text. m = RE.search(elem.text) if not m and elem.tag == 'td': m = re.search(self.BASE_RE, elem.text) if m: self.assign_attrs(elem, m.group(1)) elem.text = elem.text[:m.start()] if markdown.extensions.attr_list.isheader(elem): # clean up trailing #s elem.text = elem.text.rstrip('#').rstrip() else: # inline: check for attrs at start of tail if elem.tail: m = self.INLINE_RE.match(elem.tail) if m: self.assign_attrs(elem, m.group(1)) elem.tail = elem.tail[m.end():] def assign_attrs(self, elem, attrs): """ Assign attrs to element. """ for k, v in markdown.extensions.attr_list.get_attrs(attrs): if k == '.': # add to class cls = elem.get('class') if cls: elem.set('class', '{} {}'.format(cls, v)) else: elem.set('class', v) else: # assign attr k with v elem.set(self.sanitize_name(k), v) def sanitize_name(self, name): """ Sanitize name as 'an XML Name, minus the ":"'. See https://www.w3.org/TR/REC-xml-names/#NT-NCName """ return self.NAME_RE.sub('_', name) # Footnotes class FootnoteExtension(markdown.extensions.footnotes.FootnoteExtension): # Override def extendMarkdown(self, md): md.registerExtension(self) self.parser = md.parser self.md = md md.preprocessors.register(markdown.extensions.footnotes.FootnotePreprocessor(self), 'footnote', 15) FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175) md.treeprocessors.register(markdown.extensions.footnotes.FootnoteTreeprocessor(self), 'footnote', 50) # Override to omit backlinks def makeFootnotesDiv(self, root): if not list(self.footnotes.keys()): return None div = ET.Element("div") div.set('class', 'footnote') ol = ET.SubElement(div, "ol") surrogate_parent = ET.Element("div") for index, id in enumerate(self.footnotes.keys(), start=1): li = ET.SubElement(ol, "li") li.set("id", self.makeFootnoteId(id)) self.parser.parseChunk(surrogate_parent, self.footnotes[id]) for el in list(surrogate_parent): li.append(el) surrogate_parent.remove(el) return div class FootnoteInlineProcessor(markdown.extensions.footnotes.FootnoteInlineProcessor): # Override to handle commas def handleMatch(self, m, data): id = m.group(1).rstrip(',') if id in self.footnotes.footnotes.keys(): sup = ET.Element("sup") sup.set('class', 'footnote-ref') a = ET.SubElement(sup, "a") sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True)) a.set('href', '#' + self.footnotes.makeFootnoteId(id)) a.text = str(list(self.footnotes.footnotes.keys()).index(id) + 1) if m.group(1).endswith(','): a.tail = ',' return sup, m.start(0), m.end(0) else: return None, None, None # Custom directives and roles from . import markup_custom directives.update(markup_custom.directives) roles.update(markup_custom.roles) try: from . import markup_custom2 directives.update(markup_custom2.directives) roles.update(markup_custom2.roles) except ImportError: pass