# legalmd: Markdown-based legal markup # Copyright © 2019 Lee Yingtong Li (RunasSudo) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from itertools import zip_longest import re import mistletoe mistletoe.block_token.remove_token(mistletoe.block_token.BlockCode) class NumberedHeading(mistletoe.block_token.BlockToken): pattern = re.compile(r'(#{1,6})\s*([0-9A-Z–]+|xx)\s+(.+)') def __init__(self, match): self.level, self.label, content = match self.children = mistletoe.span_token.tokenize_inner(content) @classmethod def start(cls, line): return cls.pattern.match(line) @classmethod def read(cls, lines): line = next(lines) match = cls.pattern.match(line) level = len(match.group(1)) label = match.group(2) content = match.group(3) if content == '***': content = '★★★' return level, label, content def full_label(self): if not self.label: return None if not isinstance(self.parent, mistletoe.block_token.Document): return None return self.label mistletoe.block_token.add_token(NumberedHeading) class Subrules(mistletoe.block_token.BlockToken): pattern = re.compile(r'\t*\(([0-9A-Za-z–]+)\)\s+.') def __init__(self, children): self.children = children @classmethod def start(cls, line): return cls.pattern.match(line) @classmethod def read(cls, lines): children = [] while True: lines.anchor() try: line = next(lines) except StopIteration: break if len(line.strip()) == 0: continue lines.reset() if Note.pattern.match(line): break if not SubrulesItem.start(line): break children.append(SubrulesItem(*SubrulesItem.read(lines))) return children mistletoe.block_token.add_token(Subrules) class SubrulesItem(mistletoe.block_token.BlockToken): pattern = re.compile(r'(\t*)(\([0-9A-Za-z–]+\))?\s*(.+)') def __init__(self, level, label, children): self.level = level self.label = label self.children = children @classmethod def start(cls, line): match = cls.pattern.match(line) if not match.group(1) and not match.group(2): # Neither an indent nor a label return False if Definition.pattern.match(line): return False return True @classmethod def read(cls, lines): line = next(lines) lead_in, label, content = cls.pattern.match(line).group(1, 2, 3) if content == '***': content = '★★★' level = len(lead_in) children = mistletoe.span_token.tokenize_inner(content) if label is None: level -= 1 return level, label, children def full_label(self): if not self.label: return None if not isinstance(self.parent.parent, mistletoe.block_token.Document): return None labels = [self.label] # Subrules items cur_level = self.level for child in reversed(self.parent.children[0:self.parent.children.index(self)]): if child.level < cur_level and child.label: labels.append(child.label) cur_level = child.level # Section section = next(x for x in reversed(self.parent.parent.children[0:self.parent.parent.children.index(self.parent)]) if isinstance(x, NumberedHeading)) labels.append(section.label) return ''.join(reversed(labels)) class Note(mistletoe.block_token.BlockToken): pattern = re.compile(r'(\t*)([0-9A-Z ]+):\s+(.+)') def __init__(self, match): self.level, self.label, self.children = match @classmethod def start(cls, line): return cls.pattern.match(line) @classmethod def read(cls, lines): line = next(lines) match = cls.pattern.match(line) level_str, label, content = match.group(1, 2, 3) label = label[0].upper() + label[1:].lower() return len(level_str), label, mistletoe.span_token.tokenize_inner(content) mistletoe.block_token.add_token(Note) class Definition(mistletoe.block_token.BlockToken): pattern = re.compile(r'(\t*)(\*\*\*.+\*\*\*(?::|.*means|.*includes).*)') def __init__(self, match): self.level, self.children = match @classmethod def start(cls, line): return cls.pattern.match(line) @classmethod def read(cls, lines): line = next(lines) match = cls.pattern.match(line) level_str, content = match.group(1, 2) return len(level_str), mistletoe.span_token.tokenize_inner(content) mistletoe.block_token.add_token(Definition) def convert_leading_tabs(string): return string mistletoe.block_token.Quote.convert_leading_tabs = convert_leading_tabs class Table(mistletoe.block_token.BlockToken): def __init__(self, lines): if '---' in lines[1]: self.column_align = [mistletoe.block_token.Table.parse_align(column) for column in mistletoe.block_token.Table.split_delimiter(lines[1])] self.header = TableRow(lines[0], 0, self.column_align) self.children = [TableRow(line, rownum + 1, self.column_align) for rownum, line in enumerate(lines[2:])] else: self.column_align = [None] self.children = [TableRow(line, rownum) for rownum, line in enumerate(lines)] @staticmethod def start(line): return '|' in line @staticmethod def read(lines): lines.anchor() line_buffer = [next(lines)] while lines.peek() is not None and '|' in lines.peek(): line_buffer.append(next(lines)) if len(line_buffer) < 2 or '---' not in line_buffer[1]: lines.reset() return None return line_buffer class TableRow(mistletoe.block_token.BlockToken): def __init__(self, line, rownum, row_align=None): self.rownum = rownum self.row_align = row_align or [None] cells = filter(None, line.strip().split('|')) self.children = [TableCell(x[0].strip() if x[0] else '', self, colnum, x[1]) for colnum, x in enumerate(zip_longest(cells, self.row_align))] self.label = self.children[0].label class TableCell(mistletoe.block_token.BlockToken): pattern1 = re.compile(r'\s*(?:([0-9A-Za-z]+)(?:\[([0-9]+)\])?\s+)?(.*)') def __init__(self, inner, row, colnum, align=None): self.row = row self.colnum = colnum self.align = align if self.colnum == 0 or self.row.rownum == 0: label, weight, content = self.pattern1.match(inner).group(1, 2, 3) self.label = label.strip() if label else None self.weight = int(weight) if weight else 1 self.children = mistletoe.span_token.tokenize_inner(content) else: self.label = None self.weight = None self.children = mistletoe.span_token.tokenize_inner(inner) mistletoe.block_token.remove_token(mistletoe.block_token.Table) mistletoe.block_token.add_token(Table) class CrossReference(mistletoe.span_token.SpanToken): pattern = re.compile(r'`(?:([A-Za-z.]+?)\s+)?([0-9A-Za-z\(\)]+?)`_') pattern_parts = re.compile(r'^[0-9A-Za-z]+|\([0-9A-Za-z]+\)') def __init__(self, match): self.reference_type = match.group(1) self.reference_num = match.group(2) self.reference_parts = re.findall(self.pattern_parts, self.reference_num) def get_reference(self): parent = None elem = self.parent while elem is not None: if isinstance(elem, SubrulesItem): parent = elem break elem = elem.parent if parent is None: # Might be a freestanding rule section = next((x for x in reversed(self.parent.parent.children[0:self.parent.parent.children.index(self.parent)]) if isinstance(x, NumberedHeading)), None) if section: parent = section if parent and parent.full_label(): basenum = re.findall(self.pattern_parts, parent.full_label()) else: basenum = [] # Try possibilities for index in reversed(range(0, len(basenum) + 1)): try_ref = ''.join(basenum[:index] + self.reference_parts) if try_ref in self.doc.full_label_map: return self.doc.full_label_map[try_ref] return None mistletoe.span_token.add_token(CrossReference)