OpenTally/src/parser/blt.rs

400 lines
10 KiB
Rust
Raw Normal View History

/* OpenTally: Open-source election vote counting
* Copyright © 20212022 Lee Yingtong Li (RunasSudo)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
2021-07-29 03:24:51 +10:00
use crate::election::{Ballot, Candidate, Election};
use crate::numbers::Number;
#[cfg(not(target_arch = "wasm32"))]
use utf8_chars::BufReadCharsExt;
2021-07-29 17:34:34 +10:00
use std::fmt;
2021-07-29 03:24:51 +10:00
use std::iter::Peekable;
#[cfg(not(target_arch = "wasm32"))]
use std::fs::File;
#[cfg(not(target_arch = "wasm32"))]
use std::io::BufReader;
#[cfg(not(target_arch = "wasm32"))]
use std::path::Path;
2021-07-29 03:24:51 +10:00
/// Utility for parsing a BLT file
pub struct BLTParser<N: Number, I: Iterator<Item=char>> {
/// The peekable iterator of chars representing the BLT file
chars: Peekable<I>,
2021-07-29 03:24:51 +10:00
/// Temporary buffer for parsing ballot values
2021-07-29 17:34:34 +10:00
ballot_value_buf: String,
/// Current line number
line_no: u32,
/// Current column number
col_no: u32,
2021-07-29 03:24:51 +10:00
/// Number of candidates
num_candidates: usize,
/// Parsed [Election]
election: Election<N>,
}
/// An error when parsing a BLT file
pub enum ParseError {
2021-07-29 17:34:34 +10:00
/// Unexpected character
Unexpected(u32, u32, char),
/// Unexpected character, expected ...
Expected(u32, u32, char, &'static str),
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseError::Unexpected(line_no, col_no, char) => {
f.write_fmt(format_args!("Line {} col {}, unexpected '{}'", line_no, col_no, char))?;
}
ParseError::Expected(line_no, col_no, char, expected) => {
f.write_fmt(format_args!("Line {} col {}, unexpected '{}', expected {}", line_no, col_no, char, expected))?;
}
}
return Ok(());
}
2021-07-29 03:24:51 +10:00
}
2021-07-31 15:24:23 +10:00
impl fmt::Debug for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
return fmt::Display::fmt(self, f);
}
}
2021-07-29 03:24:51 +10:00
impl<N: Number, I: Iterator<Item=char>> BLTParser<N, I> {
// NON-TERMINALS - HIGHER LEVEL
/// Parse the BLT file
pub fn parse_blt(&mut self) -> Result<(), ParseError> {
self.delimiter();
self.header()?;
self.withdrawn_candidates()?;
self.ballots()?;
self.strings()?;
return Ok(());
}
/// Parse the header
fn header(&mut self) -> Result<(), ParseError> {
self.num_candidates = self.usize()?;
self.delimiter();
self.election.seats = self.usize()?;
self.delimiter();
return Ok(());
}
/// Parse the withdrawn candidates (if any)
fn withdrawn_candidates(&mut self) -> Result<(), ParseError> {
while self.lookahead() == '-' {
self.accept(); // Minus sign
let index = self.usize()? - 1;
self.election.withdrawn_candidates.push(index);
self.delimiter();
}
return Ok(());
}
/// Parse the list of ballots
fn ballots(&mut self) -> Result<(), ParseError> {
loop {
if self.lookahead() == '0' {
// End of ballots, or start of decimal?
self.accept();
if self.lookahead() == '.' {
// Decimal
2021-07-29 17:34:34 +10:00
self.ballot_value_buf.clear();
self.ballot_value_buf.push('0');
2021-07-29 03:24:51 +10:00
self.ballot()?;
} else {
// End of ballots
self.delimiter();
break;
}
2021-07-29 03:24:51 +10:00
} else {
2021-07-29 17:34:34 +10:00
self.ballot_value_buf.clear();
2021-07-29 03:24:51 +10:00
self.ballot()?;
}
}
return Ok(());
}
/// Parse a ballot
fn ballot(&mut self) -> Result<(), ParseError> {
self.ballot_value()?;
self.delimiter_not_nl();
// Read preferences
2021-09-04 02:26:30 +10:00
let mut preferences: Vec<Vec<usize>> = Vec::new();
2021-07-29 03:24:51 +10:00
loop {
if self.lookahead() == '0' || self.lookahead() == '\n' {
// End of preferences
self.accept();
break;
2021-09-04 02:26:30 +10:00
} else if self.lookahead() == '=' {
// Equal preference
self.accept();
preferences.last_mut().unwrap().push(self.usize()? - 1);
self.delimiter_not_nl();
2021-07-29 03:24:51 +10:00
} else {
2021-09-04 02:26:30 +10:00
// No equal preference
2021-09-03 23:53:15 +10:00
preferences.push(vec![self.usize()? - 1]);
2021-07-29 03:24:51 +10:00
self.delimiter_not_nl();
}
}
self.delimiter();
let ballot = Ballot {
2021-07-29 17:34:34 +10:00
orig_value: N::parse(&self.ballot_value_buf),
2021-10-27 19:52:51 +11:00
preferences,
2021-07-29 03:24:51 +10:00
};
self.election.ballots.push(ballot);
return Ok(());
}
/// Parse the list of strings at the end of the BLT file
fn strings(&mut self) -> Result<(), ParseError> {
for _ in 0..self.num_candidates {
let name = self.string()?;
self.election.candidates.push(Candidate {
name,
is_dummy: false,
2021-07-29 03:24:51 +10:00
});
}
let name = self.string()?;
self.election.name = name;
return Ok(());
}
// NON-TERMINALS - LOWER LEVEL
/// Parse an integer into a [usize]
fn usize(&mut self) -> Result<usize, ParseError> {
return Ok(self.integer()?.parse().expect("Invalid usize"));
}
/// Parse an integer as a [String]
fn integer(&mut self) -> Result<String, ParseError> {
let mut result = String::from(self.digit_nonzero()?);
loop {
match self.digit() {
Err(_) => { break; }
Ok(d) => { result.push(d); }
}
}
return Ok(result);
}
/// Parse a number as an instance of N
fn ballot_value(&mut self) -> Result<(), ParseError> {
loop {
match self.ballot_value_element() {
Err(_) => { break; }
2021-07-29 17:34:34 +10:00
Ok(d) => { self.ballot_value_buf.push(d); }
2021-07-29 03:24:51 +10:00
}
}
return Ok(());
}
/// Parse a quoted or raw string
fn string(&mut self) -> Result<String, ParseError> {
2021-10-27 19:52:51 +11:00
if let Ok(s) = self.quoted_string() {
return Ok(s);
2021-07-29 03:24:51 +10:00
}
2021-10-27 19:52:51 +11:00
if let Ok(s) = self.raw_string() {
return Ok(s);
2021-07-29 03:24:51 +10:00
}
2021-07-29 17:34:34 +10:00
return Err(ParseError::Expected(self.line_no, self.col_no, self.lookahead(), "string"));
2021-07-29 03:24:51 +10:00
}
/// Parse a quoted string
fn quoted_string(&mut self) -> Result<String, ParseError> {
if self.lookahead() == '"' {
self.accept(); // Opening quotation mark
let mut result = String::new();
while self.lookahead() != '"' {
// TODO: BufRead::read_until ?
result.push(self.accept());
}
self.accept(); // Closing quotation mark
if !self.eof() {
self.delimiter();
}
return Ok(result);
} else {
2021-07-29 17:34:34 +10:00
return Err(ParseError::Expected(self.line_no, self.col_no, self.lookahead(), "'\"'"));
2021-07-29 03:24:51 +10:00
}
}
/// Parse a raw (unquoted) string
fn raw_string(&mut self) -> Result<String, ParseError> {
let mut result = String::new();
while !self.lookahead().is_whitespace() && !self.eof() {
result.push(self.accept());
}
if !self.eof() {
self.delimiter();
}
return Ok(result);
}
/// Skip any sequence of whitespace or comments
fn delimiter(&mut self) {
loop {
if self.eof() {
break;
} else if self.lookahead() == '#' {
self.dnl();
if !self.eof() {
self.accept(); // Trailing newline
}
2021-07-29 03:24:51 +10:00
} else if self.lookahead().is_whitespace() {
self.accept();
while !self.eof() && self.lookahead().is_whitespace() { self.accept(); }
} else {
break;
}
}
}
/// Skip any sequence of whitespace or comments, but do not accept any newline and leave it trailing
fn delimiter_not_nl(&mut self) {
loop {
if self.eof() {
break;
} else if self.lookahead() == '#' {
self.dnl();
} else if self.lookahead().is_whitespace() && self.lookahead() != '\n' {
self.accept();
while !self.eof() && self.lookahead().is_whitespace() && self.lookahead() != '\n' { self.accept(); }
} else {
break;
}
}
2021-07-29 03:24:51 +10:00
}
/// Skip to the next newline
fn dnl(&mut self) {
while !self.eof() && self.lookahead() != '\n' {
// TODO: BufRead::read_until ?
self.accept();
}
}
// TERMINALS
/// Read a nonzero digit
fn digit_nonzero(&mut self) -> Result<char, ParseError> {
if self.lookahead() >= '1' && self.lookahead() <= '9' {
return Ok(self.accept());
} else {
2021-07-29 17:34:34 +10:00
return Err(ParseError::Expected(self.line_no, self.col_no, self.lookahead(), "nonzero digit"));
2021-07-29 03:24:51 +10:00
}
}
/// Read any digit
fn digit(&mut self) -> Result<char, ParseError> {
if self.lookahead() >= '0' && self.lookahead() <= '9' {
return Ok(self.accept());
} else {
2021-07-29 17:34:34 +10:00
return Err(ParseError::Expected(self.line_no, self.col_no, self.lookahead(), "digit"));
2021-07-29 03:24:51 +10:00
}
}
/// Read any element of a valid number, i.e. a digit, decimal point or slash
fn ballot_value_element(&mut self) -> Result<char, ParseError> {
if (self.lookahead() >= '0' && self.lookahead() <= '9') || self.lookahead() == '.' || self.lookahead() == '/' {
return Ok(self.accept());
} else {
2021-07-29 17:34:34 +10:00
return Err(ParseError::Expected(self.line_no, self.col_no, self.lookahead(), "number"));
2021-07-29 03:24:51 +10:00
}
}
// UTILITIES
/// Return if this is the end of the file
fn eof(&mut self) -> bool {
return self.chars.peek().is_none();
}
/// Peek at the next character in the stream
fn lookahead(&mut self) -> char {
return *self.chars.peek().expect("Unexpected EOF");
}
/// Read and return one character from the stream
fn accept(&mut self) -> char {
2021-07-29 17:34:34 +10:00
let result = self.chars.next().expect("Unexpected EOF");
if result == '\n' {
self.line_no += 1;
self.col_no = 1;
} else {
self.col_no += 1;
}
return result;
2021-07-29 03:24:51 +10:00
}
// PUBLIC API
/// Return a new [BLTParser]
pub fn new(chars: Peekable<I>) -> Self {
Self {
chars,
2021-07-29 17:34:34 +10:00
ballot_value_buf: String::new(),
line_no: 1,
col_no: 1,
2021-07-29 03:24:51 +10:00
num_candidates: 0,
election: Election {
name: String::new(),
seats: 0,
candidates: Vec::new(),
withdrawn_candidates: Vec::new(),
ballots: Vec::new(),
total_votes: None,
2021-07-29 03:24:51 +10:00
constraints: None,
},
}
}
/// Return the parsed [Election]
pub fn as_election(self) -> Election<N> {
return self.election;
}
}
/// Parse the given BLT file and return an [Election]
pub fn parse_iterator<I: Iterator<Item=char>, N: Number>(input: Peekable<I>) -> Result<Election<N>, ParseError> {
let mut parser = BLTParser::new(input);
parser.parse_blt()?;
return Ok(parser.as_election());
}
/// Parse the BLT file at the given path and return an [Election]
#[cfg(not(target_arch = "wasm32"))]
pub fn parse_path<P: AsRef<Path>, N: Number>(path: P) -> Result<Election<N>, ParseError> {
let mut reader = BufReader::new(File::open(path).expect("IO Error"));
let chars = reader.chars().map(|r| r.expect("IO Error")).peekable();
2021-10-27 19:52:51 +11:00
return parse_iterator(chars);
}