2020-06-20 16:25:02 +10:00
|
|
|
# WikiNote3
|
|
|
|
# Copyright © 2020 Lee Yingtong Li (RunasSudo)
|
|
|
|
#
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Affero General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
import markdown
|
|
|
|
import markdown.extensions.extra, markdown.extensions.footnotes, markdown.extensions.attr_list
|
|
|
|
|
|
|
|
import re
|
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
|
|
from .mdx_urlize import UrlizeExtension
|
|
|
|
|
|
|
|
directives = {}
|
|
|
|
roles = {}
|
|
|
|
|
|
|
|
class WNMarkdown(markdown.Markdown):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
|
|
self.registerExtensions([FootnoteExtension(), UrlizeExtension(), 'toc', 'tables'], {})
|
|
|
|
|
|
|
|
self.meta = {}
|
|
|
|
|
|
|
|
# Markdown in HTML
|
|
|
|
self.preprocessors['html_block'].markdown_in_raw = True
|
|
|
|
self.parser.blockprocessors.register(markdown.extensions.extra.MarkdownInHtmlProcessor(self.parser), 'markdown_block', 105)
|
|
|
|
self.parser.blockprocessors.tag_counter = -1
|
|
|
|
self.parser.blockprocessors.contain_span_tags = re.compile(r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE)
|
|
|
|
|
|
|
|
# Override default Markdown processors
|
|
|
|
self.preprocessors.register(NormalizeWhitespace(self), 'normalize_whitespace', 30)
|
|
|
|
self.parser.blockprocessors.register(HashHeaderProcessor(self.parser), 'hashheader', 70)
|
|
|
|
self.treeprocessors.register(AttrListTreeprocessor(self), 'attr_list', 8)
|
|
|
|
|
|
|
|
# Our own processors
|
|
|
|
self.parser.blockprocessors.register(DirectiveProcessor(self.parser), 'directive', 95)
|
|
|
|
self.parser.blockprocessors.register(AdmonitionProcessor(self.parser), 'admonition', 105)
|
|
|
|
self.inlinePatterns.register(BlueProcessor(self.parser), 'blue_em', 65)
|
|
|
|
self.inlinePatterns.register(RoleProcessor(self.parser), 'role', 500)
|
|
|
|
self.treeprocessors.register(WrapSectionProcessor(self), 'wrap_sections', 100)
|
|
|
|
|
|
|
|
# Override
|
|
|
|
def reset(self):
|
|
|
|
super().reset()
|
|
|
|
self.meta = {}
|
|
|
|
return self
|
|
|
|
|
|
|
|
# Based on Markdown.convert
|
|
|
|
def parse(self, source):
|
|
|
|
if not source.strip():
|
|
|
|
return ''
|
|
|
|
self.lines = source.split('\n')
|
|
|
|
for prep in self.preprocessors:
|
|
|
|
self.lines = prep.run(self.lines)
|
|
|
|
|
|
|
|
root = self.parser.parseDocument(self.lines).getroot()
|
|
|
|
|
|
|
|
for treeprocessor in self.treeprocessors:
|
|
|
|
newRoot = treeprocessor.run(root)
|
|
|
|
if newRoot is not None:
|
|
|
|
root = newRoot
|
|
|
|
|
|
|
|
return root
|
|
|
|
|
|
|
|
# Based on Markdown.convert
|
2020-06-20 23:05:33 +10:00
|
|
|
def serialise(self, root):
|
2020-06-20 16:25:02 +10:00
|
|
|
output = self.serializer(root)
|
|
|
|
|
|
|
|
if self.stripTopLevelTags:
|
|
|
|
try:
|
|
|
|
start = output.index('<{}>'.format(self.doc_tag)) + len(self.doc_tag) + 2
|
|
|
|
end = output.rindex('</{}>'.format(self.doc_tag))
|
|
|
|
output = output[start:end].strip()
|
|
|
|
except ValueError:
|
|
|
|
if output.strip().endswith('<{} />'.format(self.doc_tag)):
|
|
|
|
output = ''
|
|
|
|
|
|
|
|
for pp in self.postprocessors:
|
|
|
|
output = pp.run(output)
|
|
|
|
|
|
|
|
return output.strip()
|
|
|
|
|
2020-06-20 23:05:33 +10:00
|
|
|
# Put it together
|
|
|
|
def convert(self, source):
|
|
|
|
root = self.parse(source)
|
|
|
|
return self.serialise(root)
|
|
|
|
|
2020-06-20 16:25:02 +10:00
|
|
|
def detab(self, text):
|
|
|
|
newtext = []
|
|
|
|
lines = text.split('\n')
|
|
|
|
for line in lines:
|
|
|
|
if line.startswith(' '*self.tab_length):
|
|
|
|
newtext.append(line[self.tab_length:])
|
|
|
|
elif line.startswith('\t'):
|
|
|
|
newtext.append(line[1:])
|
|
|
|
elif not line.strip():
|
|
|
|
newtext.append('')
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
return '\n'.join(newtext), '\n'.join(lines[len(newtext):])
|
|
|
|
|
|
|
|
class HashHeaderProcessor(markdown.blockprocessors.HashHeaderProcessor):
|
|
|
|
# Override to add 1 to level
|
|
|
|
def run(self, parent, blocks):
|
|
|
|
block = blocks.pop(0)
|
|
|
|
m = self.RE.search(block)
|
|
|
|
before = block[:m.start()]
|
|
|
|
after = block[m.end():]
|
|
|
|
if before:
|
|
|
|
self.parser.parseBlocks(parent, [before])
|
|
|
|
h = ET.SubElement(parent, 'h{}'.format(len(m.group('level')) + 1)) # Add 1 to level
|
|
|
|
h.text = m.group('header').strip()
|
|
|
|
if after:
|
|
|
|
blocks.insert(0, after)
|
|
|
|
|
|
|
|
class NormalizeWhitespace(markdown.preprocessors.Preprocessor):
|
|
|
|
# Override to retain tabs
|
|
|
|
def run(self, lines):
|
|
|
|
source = '\n'.join(lines)
|
|
|
|
source = source.replace(markdown.util.STX, "").replace(markdown.util.ETX, "")
|
|
|
|
source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
|
|
|
|
source = re.sub(r'(?<=\n) +\n', '\n', source)
|
|
|
|
return source.split('\n')
|
|
|
|
|
|
|
|
class DirectiveProcessor(markdown.blockprocessors.BlockProcessor):
|
|
|
|
RE = re.compile(r'^.. +(?P<name>[a-zA-Z0-9_-]+?)::(?: +(?P<arg>.*?))?(?:\n|$)')
|
|
|
|
|
|
|
|
def test(self, parent, block):
|
|
|
|
return bool(self.RE.search(block))
|
|
|
|
|
|
|
|
def run(self, parent, blocks):
|
|
|
|
block = blocks.pop(0)
|
|
|
|
m = self.RE.search(block)
|
|
|
|
|
|
|
|
# Get directive content
|
|
|
|
if '\n' in block:
|
|
|
|
content = block[block.index('\n') + 1:]
|
|
|
|
else:
|
|
|
|
content = ''
|
|
|
|
for b in blocks[:]:
|
|
|
|
if b.startswith('\t'):
|
|
|
|
blocks.pop(0)
|
|
|
|
content += b
|
|
|
|
|
|
|
|
content, theRest = self.parser.md.detab(content)
|
|
|
|
|
|
|
|
directive = directives[m.group('name')](self.parser.md, arg=m.group('arg') or '', content=content)
|
|
|
|
el = directive.render()
|
|
|
|
el.directive = directive
|
|
|
|
parent.append(el)
|
|
|
|
|
|
|
|
if theRest:
|
|
|
|
blocks.insert(0, theRest)
|
|
|
|
|
|
|
|
class RoleProcessor(markdown.inlinepatterns.InlineProcessor):
|
|
|
|
def __init__(self, md):
|
|
|
|
super().__init__(r':(?P<name>[^:]+?):`(?P<content>[^`]+?)`', md)
|
|
|
|
|
|
|
|
def handleMatch(self, m, data):
|
|
|
|
role = roles[m.group('name')](self.md, m.group('content'))
|
|
|
|
el = role.render()
|
|
|
|
el.role = role
|
|
|
|
return el, m.start(0), m.end(0)
|
|
|
|
|
|
|
|
class BlueProcessor(markdown.inlinepatterns.InlineProcessor):
|
|
|
|
def __init__(self, md):
|
|
|
|
super().__init__(r'!!(.+?)!!', md)
|
|
|
|
|
|
|
|
def handleMatch(self, m, data):
|
|
|
|
el = ET.Element('span')
|
|
|
|
el.text = m.group(1)
|
|
|
|
el.set('class', 'blue')
|
|
|
|
return el, m.start(0), m.end(0)
|
|
|
|
|
|
|
|
class WrapSectionProcessor(markdown.treeprocessors.Treeprocessor):
|
|
|
|
def run(self, root):
|
|
|
|
section = ET.Element('section')
|
|
|
|
|
|
|
|
for child in list(root):
|
|
|
|
if child.tag in ('h1', 'h2', 'h3'):
|
|
|
|
if len(section) > 0:
|
|
|
|
root.insert(list(root).index(child), section)
|
|
|
|
section = ET.Element('section')
|
|
|
|
else:
|
|
|
|
section.append(child)
|
|
|
|
root.remove(child)
|
|
|
|
|
|
|
|
if len(section) > 0:
|
|
|
|
root.append(section)
|
|
|
|
|
|
|
|
# Adapted from Python-Markdown
|
|
|
|
# Allow tabs
|
|
|
|
class AdmonitionProcessor(markdown.blockprocessors.BlockProcessor):
|
|
|
|
CLASSNAME = 'admonition'
|
|
|
|
CLASSNAME_TITLE = 'admonition-title'
|
|
|
|
RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)')
|
|
|
|
RE_SPACES = re.compile(' +|\t+')
|
|
|
|
|
|
|
|
def test(self, parent, block):
|
|
|
|
sibling = self.lastChild(parent)
|
|
|
|
return self.RE.search(block) or \
|
|
|
|
((block.startswith(' ' * self.tab_length) or block.startswith('\t')) and sibling is not None and
|
|
|
|
sibling.get('class', '').find(self.CLASSNAME) != -1)
|
|
|
|
|
|
|
|
def run(self, parent, blocks):
|
|
|
|
sibling = self.lastChild(parent)
|
|
|
|
block = blocks.pop(0)
|
|
|
|
m = self.RE.search(block)
|
|
|
|
|
|
|
|
if m:
|
|
|
|
block = block[m.end():] # removes the first line
|
|
|
|
|
|
|
|
block, theRest = self.parser.md.detab(block)
|
|
|
|
|
|
|
|
if m:
|
|
|
|
klass, title = self.get_class_and_title(m)
|
|
|
|
div = ET.SubElement(parent, 'div')
|
|
|
|
div.set('class', '{} {}'.format(self.CLASSNAME, klass))
|
|
|
|
if title:
|
|
|
|
p = ET.SubElement(div, 'p')
|
|
|
|
p.text = title
|
|
|
|
p.set('class', self.CLASSNAME_TITLE)
|
|
|
|
else:
|
|
|
|
div = sibling
|
|
|
|
|
|
|
|
self.parser.parseChunk(div, block)
|
|
|
|
|
|
|
|
if theRest:
|
|
|
|
# This block contained unindented line(s) after the first indented
|
|
|
|
# line. Insert these lines as the first block of the master blocks
|
|
|
|
# list for future processing.
|
|
|
|
blocks.insert(0, theRest)
|
|
|
|
|
|
|
|
def get_class_and_title(self, match):
|
|
|
|
klass, title = match.group(1).lower(), match.group(2)
|
|
|
|
klass = self.RE_SPACES.sub(' ', klass)
|
|
|
|
if title is None:
|
|
|
|
# no title was provided, use the capitalized classname as title
|
|
|
|
# e.g.: `!!! note` will render
|
|
|
|
# `<p class="admonition-title">Note</p>`
|
|
|
|
title = klass.split(' ', 1)[0].capitalize()
|
|
|
|
elif title == '':
|
|
|
|
# an explicit blank title should not be rendered
|
|
|
|
# e.g.: `!!! warning ""` will *not* render `p` with a title
|
|
|
|
title = None
|
|
|
|
return klass, title
|
|
|
|
|
|
|
|
# Adapted from Python-Markdown
|
|
|
|
# Fix for tables
|
|
|
|
class AttrListTreeprocessor(markdown.treeprocessors.Treeprocessor):
|
|
|
|
BASE_RE = r'\{\:?([^\}\n]*)\}'
|
|
|
|
HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
|
|
|
|
BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
|
|
|
|
INLINE_RE = re.compile(r'^%s' % BASE_RE)
|
|
|
|
NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
|
|
|
|
r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
|
|
|
|
r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
|
|
|
|
r'\uf900-\ufdcf\ufdf0-\ufffd'
|
|
|
|
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
|
|
|
|
|
|
|
|
def run(self, doc):
|
|
|
|
for elem in doc.iter():
|
|
|
|
if self.md.is_block_level(elem.tag):
|
|
|
|
# Block level: check for attrs on last line of text
|
|
|
|
RE = self.BLOCK_RE
|
|
|
|
if markdown.extensions.attr_list.isheader(elem) or elem.tag == 'dt':
|
|
|
|
# header or def-term: check for attrs at end of line
|
|
|
|
RE = self.HEADER_RE
|
|
|
|
if len(elem) and elem.tag == 'li':
|
|
|
|
# special case list items. children may include a ul or ol.
|
|
|
|
pos = None
|
|
|
|
# find the ul or ol position
|
|
|
|
for i, child in enumerate(elem):
|
|
|
|
if child.tag in ['ul', 'ol']:
|
|
|
|
pos = i
|
|
|
|
break
|
|
|
|
if pos is None and elem[-1].tail:
|
|
|
|
# use tail of last child. no ul or ol.
|
|
|
|
m = RE.search(elem[-1].tail)
|
|
|
|
if m:
|
|
|
|
self.assign_attrs(elem, m.group(1))
|
|
|
|
elem[-1].tail = elem[-1].tail[:m.start()]
|
|
|
|
elif pos is not None and pos > 0 and elem[pos-1].tail:
|
|
|
|
# use tail of last child before ul or ol
|
|
|
|
m = RE.search(elem[pos-1].tail)
|
|
|
|
if m:
|
|
|
|
self.assign_attrs(elem, m.group(1))
|
|
|
|
elem[pos-1].tail = elem[pos-1].tail[:m.start()]
|
|
|
|
elif elem.text:
|
|
|
|
# use text. ul is first child.
|
|
|
|
m = RE.search(elem.text)
|
|
|
|
if m:
|
|
|
|
self.assign_attrs(elem, m.group(1))
|
|
|
|
elem.text = elem.text[:m.start()]
|
|
|
|
elif len(elem) and elem.tag == 'table' and len(elem[-1]) and len(elem[-1][-1]) and elem[-1][-1][0].text:
|
|
|
|
# SPECIAL CASE table, use last row
|
|
|
|
RE = self.INLINE_RE
|
|
|
|
m = RE.search(elem[-1][-1][0].text) # tbody -> tr -> td
|
|
|
|
if m:
|
|
|
|
self.assign_attrs(elem, m.group(1))
|
|
|
|
# Remove last row
|
|
|
|
elem[-1].remove(elem[-1][-1]) # tbody -> tr
|
|
|
|
elif len(elem) and elem[-1].tail:
|
|
|
|
# has children. Get from tail of last child
|
|
|
|
m = RE.search(elem[-1].tail)
|
|
|
|
if m:
|
|
|
|
self.assign_attrs(elem, m.group(1))
|
|
|
|
elem[-1].tail = elem[-1].tail[:m.start()]
|
|
|
|
if markdown.extensions.attr_list.isheader(elem):
|
|
|
|
# clean up trailing #s
|
|
|
|
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
|
|
|
|
elif elem.text:
|
|
|
|
# no children. Get from text.
|
|
|
|
m = RE.search(elem.text)
|
|
|
|
if not m and elem.tag == 'td':
|
|
|
|
m = re.search(self.BASE_RE, elem.text)
|
|
|
|
if m:
|
|
|
|
self.assign_attrs(elem, m.group(1))
|
|
|
|
elem.text = elem.text[:m.start()]
|
|
|
|
if markdown.extensions.attr_list.isheader(elem):
|
|
|
|
# clean up trailing #s
|
|
|
|
elem.text = elem.text.rstrip('#').rstrip()
|
|
|
|
else:
|
|
|
|
# inline: check for attrs at start of tail
|
|
|
|
if elem.tail:
|
|
|
|
m = self.INLINE_RE.match(elem.tail)
|
|
|
|
if m:
|
|
|
|
self.assign_attrs(elem, m.group(1))
|
|
|
|
elem.tail = elem.tail[m.end():]
|
|
|
|
|
|
|
|
def assign_attrs(self, elem, attrs):
|
|
|
|
""" Assign attrs to element. """
|
|
|
|
for k, v in markdown.extensions.attr_list.get_attrs(attrs):
|
|
|
|
if k == '.':
|
|
|
|
# add to class
|
|
|
|
cls = elem.get('class')
|
|
|
|
if cls:
|
|
|
|
elem.set('class', '{} {}'.format(cls, v))
|
|
|
|
else:
|
|
|
|
elem.set('class', v)
|
|
|
|
else:
|
|
|
|
# assign attr k with v
|
|
|
|
elem.set(self.sanitize_name(k), v)
|
|
|
|
|
|
|
|
def sanitize_name(self, name):
|
|
|
|
"""
|
|
|
|
Sanitize name as 'an XML Name, minus the ":"'.
|
|
|
|
See https://www.w3.org/TR/REC-xml-names/#NT-NCName
|
|
|
|
"""
|
|
|
|
return self.NAME_RE.sub('_', name)
|
|
|
|
|
|
|
|
# Footnotes
|
|
|
|
|
|
|
|
class FootnoteExtension(markdown.extensions.footnotes.FootnoteExtension):
|
|
|
|
# Override
|
|
|
|
def extendMarkdown(self, md):
|
|
|
|
md.registerExtension(self)
|
|
|
|
self.parser = md.parser
|
|
|
|
self.md = md
|
|
|
|
md.preprocessors.register(markdown.extensions.footnotes.FootnotePreprocessor(self), 'footnote', 15)
|
|
|
|
FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
|
|
|
|
md.inlinePatterns.register(FootnoteInlineProcessor(FOOTNOTE_RE, self), 'footnote', 175)
|
|
|
|
md.treeprocessors.register(markdown.extensions.footnotes.FootnoteTreeprocessor(self), 'footnote', 50)
|
|
|
|
|
|
|
|
# Override to omit backlinks
|
|
|
|
def makeFootnotesDiv(self, root):
|
|
|
|
if not list(self.footnotes.keys()):
|
|
|
|
return None
|
|
|
|
|
|
|
|
div = ET.Element("div")
|
|
|
|
div.set('class', 'footnote')
|
|
|
|
ol = ET.SubElement(div, "ol")
|
|
|
|
surrogate_parent = ET.Element("div")
|
|
|
|
|
|
|
|
for index, id in enumerate(self.footnotes.keys(), start=1):
|
|
|
|
li = ET.SubElement(ol, "li")
|
|
|
|
li.set("id", self.makeFootnoteId(id))
|
|
|
|
self.parser.parseChunk(surrogate_parent, self.footnotes[id])
|
|
|
|
for el in list(surrogate_parent):
|
|
|
|
li.append(el)
|
|
|
|
surrogate_parent.remove(el)
|
|
|
|
return div
|
|
|
|
|
|
|
|
class FootnoteInlineProcessor(markdown.extensions.footnotes.FootnoteInlineProcessor):
|
|
|
|
# Override to handle commas
|
|
|
|
def handleMatch(self, m, data):
|
|
|
|
id = m.group(1).rstrip(',')
|
|
|
|
if id in self.footnotes.footnotes.keys():
|
|
|
|
sup = ET.Element("sup")
|
|
|
|
sup.set('class', 'footnote-ref')
|
|
|
|
a = ET.SubElement(sup, "a")
|
|
|
|
sup.set('id', self.footnotes.makeFootnoteRefId(id, found=True))
|
|
|
|
a.set('href', '#' + self.footnotes.makeFootnoteId(id))
|
|
|
|
a.text = str(list(self.footnotes.footnotes.keys()).index(id) + 1)
|
|
|
|
if m.group(1).endswith(','):
|
|
|
|
a.tail = ','
|
|
|
|
return sup, m.start(0), m.end(0)
|
|
|
|
else:
|
|
|
|
return None, None, None
|
|
|
|
|
|
|
|
# Custom directives and roles
|
2020-06-20 23:10:01 +10:00
|
|
|
from . import markup_custom
|
|
|
|
directives.update(markup_custom.directives)
|
|
|
|
roles.update(markup_custom.roles)
|
|
|
|
|
2020-06-20 16:25:02 +10:00
|
|
|
try:
|
2020-06-20 23:10:01 +10:00
|
|
|
from . import markup_custom2
|
|
|
|
directives.update(markup_custom2.directives)
|
|
|
|
roles.update(markup_custom2.roles)
|
2020-06-20 16:25:02 +10:00
|
|
|
except ImportError:
|
|
|
|
pass
|