htmlcc/htmlcc/parser.py

231 lines
7.3 KiB
Python

# htmlcc - Statically compiled HTML templates for C
# Copyright (C) 2025 Lee Yingtong Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from .emitter import Emitter
class Parser:
"""Parser implementation for template files"""
def __init__(self, buffer: str, emitter: Emitter):
self.buffer = buffer
self.emitter = emitter
# Internal state
self.cur_line_contains_handlebars = False
self.cur_line_contains_nonblank_literal = False
self.cur_line_leading_ws = '' # Buffer for leading whitespace
self.in_html = False
def parse(self) -> None:
"""Parse the entire file"""
while self.buffer:
self.parse_toplevel()
def parse_toplevel(self) -> None:
"""Parse a literal string or handlebars"""
if self.buffer.startswith('{{'):
self.parse_handlebars_variable()
elif self.buffer.startswith('{!'):
self.parse_handlebars_raw_c()
elif self.buffer.startswith('{%'):
self.parse_handlebars_keyword()
elif self.buffer.startswith('{#'):
self.parse_handlebars_comment()
else:
self.parse_literal()
def parse_handlebars_comment(self) -> None:
"""Parse {# ... #} handlebars"""
self.cur_line_contains_handlebars = True
# Read until and including #}
stop_reading_idx = self.buffer.index('#}') + 2
self.buffer = self.buffer[stop_reading_idx:]
def parse_handlebars_keyword(self) -> None:
"""Parse {% ... %} handlebars"""
self.cur_line_contains_handlebars = True
# Read until and including %}
stop_reading_idx = self.buffer.index('%}') + 2
s = self.buffer[:stop_reading_idx]
self.buffer = self.buffer[stop_reading_idx:]
command = s[len('{%'):-len('%}')].strip()
if command.startswith('block '):
# {% block ... %}
block_name = command[len('block '):].strip()
self.emitter.start_block(block_name)
self.in_html = True
elif command == 'endblock':
# {% endblock %}
self.emitter.end_block()
self.in_html = False
elif command.startswith('for '):
# {% for ... %}
for_defn = command[len('if '):].strip()
self.emitter.emit('for (' + for_defn + ') {')
elif command == 'endfor':
# {% endfor %}
self.emitter.emit('}')
elif command.startswith('if '):
# {% if ... %}
condition = command[len('if '):].strip()
self.emitter.emit('if (' + condition + ') {')
elif command.startswith('elif '):
# {% elif ... %}
condition = command[len('elif '):].strip()
self.emitter.emit('} else if (' + condition + ') {')
elif command == 'else':
# {% else %}
self.emitter.emit('} else {')
elif command == 'endif':
# {% endif %}
self.emitter.emit('}')
elif command.startswith('page '):
# {% page ... %}
page_name = command[len('page '):].strip()
self.emitter.start_page(page_name)
self.in_html = True
elif command == 'endpage':
# {% endpage %}
self.emitter.end_page()
self.in_html = False
elif command.startswith('while '):
# {% while ... %}
condition = command[len('while '):].strip()
self.emitter.emit('while (' + condition + ') {')
elif command == 'endwhile':
# {% endwhile %}
self.emitter.emit('}')
else:
raise SyntaxError(f'Unknown command "{command}"')
def parse_handlebars_variable(self) -> None:
"""Parse {{ ... }} handlebars"""
self.cur_line_contains_handlebars = True
# Read until and including }}
stop_reading_idx = self.buffer.index('}}') + 2
s = self.buffer[:stop_reading_idx]
self.buffer = self.buffer[stop_reading_idx:]
variable = s[len('{{'):-len('}}')].strip()
# Detect filters
if variable.endswith('attr') and variable[:-len('attr')].rstrip().endswith('|'):
# Output as HTML attribute
variable = variable[:-len('attr')].rstrip()[:-1].rstrip()
self.emitter.output_variable_as_attr(variable)
elif variable.endswith('urlencode') and variable[:-len('urlencode')].rstrip().endswith('|'):
# Output as URL component
variable = variable[:-len('urlencode')].rstrip()[:-1].rstrip()
self.emitter.output_variable_urlencoded(variable)
elif variable.endswith('%d') and variable[:-len('%d')].rstrip().endswith('|'):
# Output as %d
variable = variable[:-len('%d')].rstrip()[:-1].rstrip()
self.emitter.output_variable_formatted('%d', variable)
else:
# No filter - output as text
self.emitter.output_variable_as_text(variable)
def parse_handlebars_raw_c(self) -> None:
"""Parse {! ... !} handlebars"""
self.cur_line_contains_handlebars = True
# Read until and including !}
stop_reading_idx = self.buffer.index('!}') + 2
s = self.buffer[:stop_reading_idx]
self.buffer = self.buffer[stop_reading_idx:]
raw_c = s[len('{!'):-len('!}')].strip()
self.emitter.emit_raw_c(raw_c)
def parse_literal(self) -> None:
"""Parse literal string"""
# Read until the next newline or handlebars or EOF
stop_reading_idx_candidates = [
len(self.buffer), # EOF is backup case
self.buffer.index('\n') + 1 if '\n' in self.buffer else None, # Include the newline character
self.buffer.index('{{') if '{{' in self.buffer else None,
self.buffer.index('{!') if '{!' in self.buffer else None,
self.buffer.index('{%') if '{%' in self.buffer else None,
self.buffer.index('{#') if '{#' in self.buffer else None,
]
stop_reading_idx = min(idx for idx in stop_reading_idx_candidates if idx is not None)
s = self.buffer[:stop_reading_idx]
self.buffer = self.buffer[stop_reading_idx:]
if not self.in_html:
# Not in a page block!
if s.isspace():
# Suppress whitespace outside page blocks
return
raise SyntaxError('Unexpected text outside page block')
if s.isspace():
if not self.cur_line_contains_nonblank_literal:
if '\n' in s:
# End of line which contains only nonblank literals
if self.cur_line_contains_handlebars:
# Do not print the whitespace
self.reset_new_line()
return
else:
# Not sure yet whether we should print the space - add to whitespace buffer
self.cur_line_leading_ws += s
return
else:
self.commit_leading_ws()
self.cur_line_contains_nonblank_literal = True
# Conditionally output the literal string
if not self.cur_line_contains_nonblank_literal and self.cur_line_contains_handlebars:
# If the current line contains handlebars and whitespace only, do not emit literal string
return
self.emitter.output_literal_string(s)
if '\n' in s:
self.reset_new_line()
def reset_new_line(self) -> None:
"""Reset the internal state for a new line"""
self.cur_line_contains_handlebars = False
self.cur_line_contains_nonblank_literal = False
self.cur_line_leading_ws = ''
def commit_leading_ws(self) -> None:
"""Commit cur_line_leading_ws buffer to output"""
if self.cur_line_leading_ws:
self.emitter.output_literal_string(self.cur_line_leading_ws)
self.cur_line_leading_ws = ''
class SyntaxError(Exception):
pass