diff --git a/htmlcc/parser.py b/htmlcc/parser.py index f19e91e..1db54f1 100644 --- a/htmlcc/parser.py +++ b/htmlcc/parser.py @@ -16,6 +16,8 @@ from .emitter import Emitter +import re + class Parser: """Parser implementation for template files""" @@ -24,9 +26,8 @@ class Parser: self.emitter = emitter # Internal state - self.cur_line_contains_handlebars = False - self.cur_line_contains_nonblank_literal = False - self.cur_line_leading_ws = '' # Buffer for leading whitespace + self.start_of_line = True + self.cur_line_suppress_whitespace = False self.in_html = False def parse(self) -> None: @@ -191,27 +192,16 @@ class Parser: raise SyntaxError('Unexpected text outside page block') if s.isspace(): - if not self.cur_line_contains_nonblank_literal: - if '\n' in s: - # End of line which contains only nonblank literals - if self.cur_line_contains_handlebars: - # Do not print the whitespace - self.reset_new_line() - return - else: - # Not sure yet whether we should print the space - add to whitespace buffer - self.cur_line_leading_ws += s - return + if self.start_of_line: + # Whitespace at start of line + self.cur_line_suppress_whitespace = line_contains_only_ws_or_handlebars(s, self.buffer) + + if not self.cur_line_suppress_whitespace: + self.emitter.output_literal_string(s) else: - self.commit_leading_ws() - self.cur_line_contains_nonblank_literal = True + self.emitter.output_literal_string(s) - # Conditionally output the literal string - if not self.cur_line_contains_nonblank_literal and self.cur_line_contains_handlebars: - # If the current line contains handlebars and whitespace only, do not emit literal string - return - - self.emitter.output_literal_string(s) + self.start_of_line = False if '\n' in s: self.reset_new_line() @@ -219,16 +209,29 @@ class Parser: def reset_new_line(self) -> None: """Reset the internal state for a new line""" - self.cur_line_contains_handlebars = False - self.cur_line_contains_nonblank_literal = False - self.cur_line_leading_ws = '' - - def commit_leading_ws(self) -> None: - """Commit cur_line_leading_ws buffer to output""" - - if self.cur_line_leading_ws: - self.emitter.output_literal_string(self.cur_line_leading_ws) - self.cur_line_leading_ws = '' + self.start_of_line = True + self.cur_line_suppress_whitespace = False class SyntaxError(Exception): pass + +def line_contains_only_ws_or_handlebars(s: str, buffer: str) -> bool: + """Return True if the given line contains only whitespace or handlebars""" + + if not s.isspace(): + return False + + if '\n' in s: + return True + + # Read until the next newline or EOF + stop_reading_idx = buffer.index('\n') + 1 if '\n' in buffer else len(buffer) + remaining_line = buffer[:stop_reading_idx] + + # Remove all handlebars + text_in_remaining_line = re.sub(r'\{\{.*?\}\}|\{!.*?!\}|\{%.*?%\}|\{#.*?#\}', r'', remaining_line) + + if not text_in_remaining_line: + return True + + return text_in_remaining_line.isspace()