From 422a198cf5a08104fe9739605fb92723269d073b Mon Sep 17 00:00:00 2001 From: RunasSudo Date: Sat, 20 Aug 2022 23:20:42 +1000 Subject: [PATCH] Use NoHashHasher for Candidate HashMaps to improve performance --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + src/constraints.rs | 9 +++++---- src/election.rs | 38 +++++++++++++++++++++++++++++------- src/parser/blt.rs | 3 ++- src/parser/csp.rs | 5 +++-- src/stv/gregory/transfers.rs | 20 ++++++++++++------- src/stv/meek.rs | 7 ++++--- src/stv/mod.rs | 9 +++++---- 9 files changed, 71 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 72f791f..265b736 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -651,6 +651,12 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "nohash-hasher" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -738,6 +744,7 @@ dependencies = [ "itertools", "js-sys", "ndarray", + "nohash-hasher", "num-bigint", "num-rational", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index a0298b2..cfcc26f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ csv = "1.1.6" derive_builder = "0.10.2" derive_more = "0.99.14" git-version = "0.3.4" +nohash-hasher = "0.2.0" ibig = "0.3.2" itertools = "0.10.1" ndarray = "0.15.3" diff --git a/src/constraints.rs b/src/constraints.rs index 6893e6d..21649c3 100644 --- a/src/constraints.rs +++ b/src/constraints.rs @@ -22,6 +22,7 @@ use crate::ties::{self, TieStrategy}; use itertools::Itertools; use ndarray::{Array, Dimension, IxDyn}; +use nohash_hasher::BuildNoHashHasher; #[cfg(not(target_arch = "wasm32"))] use rkyv::{Archive, Deserialize, Serialize}; @@ -130,7 +131,7 @@ impl Constraints { } /// Check if any elected candidates exceed constrained maximums - pub fn exceeds_maximum<'a, N: Number>(&self, election: &Election, candidates: HashMap<&'a Candidate, CountCard<'a, N>>) -> Option<(&Constraint, &ConstrainedGroup)> { + pub fn exceeds_maximum<'a, N: Number>(&self, election: &Election, candidates: HashMap<&'a Candidate, CountCard<'a, N>, BuildNoHashHasher>) -> Option<(&Constraint, &ConstrainedGroup)> { for constraint in &self.0 { for group in &constraint.groups { let mut num_elected = 0; @@ -360,7 +361,7 @@ impl ConstraintMatrix { } /// Update cands/elected in innermost cells based on the provided [CountState::candidates](crate::election::CountState::candidates) - pub fn update_from_state(&mut self, election: &Election, candidates: &HashMap<&Candidate, CountCard>) { + pub fn update_from_state(&mut self, election: &Election, candidates: &HashMap<&Candidate, CountCard, BuildNoHashHasher>) { let constraints = election.constraints.as_ref().unwrap(); // Reset innermost cells @@ -626,7 +627,7 @@ impl ops::IndexMut<&[usize]> for ConstraintMatrix { } /// Return the [Candidate]s referred to in the given [ConstraintMatrixCell] at location `idx` -fn candidates_in_constraint_cell<'a, N: Number>(election: &'a Election, candidates: &HashMap<&Candidate, CountCard>, idx: &[usize]) -> Vec<&'a Candidate> { +fn candidates_in_constraint_cell<'a, N: Number>(election: &'a Election, candidates: &HashMap<&Candidate, CountCard, BuildNoHashHasher>, idx: &[usize]) -> Vec<&'a Candidate> { let mut result: Vec<&Candidate> = Vec::new(); for (i, candidate) in election.candidates.iter().enumerate() { let cc = &candidates[candidate]; @@ -782,7 +783,7 @@ pub fn init_repeat_count(election: &mut Election) { /// Initialise the rollback for [ConstraintMode::TwoStage] pub fn init_repeat_count_rollback<'a, N: Number>(state: &mut CountState<'a, N>, constraint: &'a Constraint, group: &'a ConstrainedGroup) { - let mut rollback_candidates = HashMap::new(); + let mut rollback_candidates = HashMap::with_capacity_and_hasher(state.candidates.len(), BuildNoHashHasher::default()); let rollback_exhausted = state.exhausted.clone(); // Copy ballot papers to rollback state diff --git a/src/election.rs b/src/election.rs index d2f0b8c..b4998f6 100644 --- a/src/election.rs +++ b/src/election.rs @@ -24,6 +24,7 @@ use crate::stv::gregory::TransferTable; use crate::stv::meek::BallotTree; use itertools::Itertools; +use nohash_hasher::BuildNoHashHasher; #[cfg(not(target_arch = "wasm32"))] use rkyv::{Archive, Deserialize, Serialize}; @@ -33,6 +34,7 @@ use crate::numbers::{SerializedNumber, SerializedOptionNumber}; use std::cmp::max; use std::collections::HashMap; use std::fmt; +use std::hash::{Hash, Hasher}; /// An election to be counted #[cfg_attr(not(target_arch = "wasm32"), derive(Archive, Deserialize, Serialize))] @@ -98,15 +100,26 @@ impl Election { } /// A candidate in an [Election] -#[derive(Clone, Eq, Hash, PartialEq)] +#[derive(Clone, Eq, PartialEq)] #[cfg_attr(not(target_arch = "wasm32"), derive(Archive, Deserialize, Serialize))] pub struct Candidate { + /// Index of the candidate + pub index: usize, /// Name of the candidate pub name: String, /// If this candidate is a dummy candidate (e.g. for --constraint-mode repeat_count) pub is_dummy: bool, } +impl Hash for Candidate { + fn hash(&self, hasher: &mut H) { + // Custom implementation of hash for use with NoHashHasher, to improve performance + hasher.write_usize(self.index); + } +} + +impl nohash_hasher::IsEnabled for Candidate {} + /// The current state of counting an [Election] #[derive(Clone)] pub struct CountState<'a, N: Number> { @@ -114,7 +127,7 @@ pub struct CountState<'a, N: Number> { pub election: &'a Election, /// [HashMap] of [CountCard]s for each [Candidate] in the election - pub candidates: HashMap<&'a Candidate, CountCard<'a, N>>, + pub candidates: HashMap<&'a Candidate, CountCard<'a, N>, BuildNoHashHasher>, /// [CountCard] representing the exhausted pile pub exhausted: CountCard<'a, N>, /// [CountCard] representing loss by fraction @@ -124,9 +137,9 @@ pub struct CountState<'a, N: Number> { pub ballot_tree: Option>, /// Values used to break ties, based on forwards tie-breaking - pub forwards_tiebreak: Option>, + pub forwards_tiebreak: Option>>, /// Values used to break ties, based on backwards tie-breaking - pub backwards_tiebreak: Option>, + pub backwards_tiebreak: Option>>, /// [SHARandom] for random tie-breaking pub random: Option>, @@ -161,7 +174,7 @@ impl<'a, N: Number> CountState<'a, N> { pub fn new(election: &'a Election) -> Self { let mut state = CountState { election, - candidates: HashMap::new(), + candidates: HashMap::with_capacity_and_hasher(election.candidates.len(), BuildNoHashHasher::default()), exhausted: CountCard::new(), loss_fraction: CountCard::new(), ballot_tree: None, @@ -590,7 +603,18 @@ pub enum RollbackState<'a, N> { /// Not rolling back Normal, /// Start rolling back next stage - NeedsRollback { candidates: Option>>, exhausted: Option>, constraint: &'a Constraint, group: &'a ConstrainedGroup }, + NeedsRollback { + candidates: Option, BuildNoHashHasher>>, + exhausted: Option>, + constraint: &'a Constraint, + group: &'a ConstrainedGroup + }, /// Rolling back - RollingBack { candidates: Option>>, exhausted: Option>, candidate_distributing: Option<&'a Candidate>, constraint: Option<&'a Constraint>, group: Option<&'a ConstrainedGroup> }, + RollingBack { + candidates: Option, BuildNoHashHasher>>, + exhausted: Option>, + candidate_distributing: Option<&'a Candidate>, + constraint: Option<&'a Constraint>, + group: Option<&'a ConstrainedGroup> + }, } diff --git a/src/parser/blt.rs b/src/parser/blt.rs index e8a080a..f0446a4 100644 --- a/src/parser/blt.rs +++ b/src/parser/blt.rs @@ -175,9 +175,10 @@ impl> BLTParser { /// Parse the list of strings at the end of the BLT file fn strings(&mut self) -> Result<(), ParseError> { - for _ in 0..self.num_candidates { + for index in 0..self.num_candidates { let name = self.string()?; self.election.candidates.push(Candidate { + index, name, is_dummy: false, }); diff --git a/src/parser/csp.rs b/src/parser/csp.rs index 257a6c9..a621909 100644 --- a/src/parser/csp.rs +++ b/src/parser/csp.rs @@ -36,13 +36,14 @@ pub fn parse_reader(reader: R, require_1: bool, require_sequ let mut candidates = Vec::new(); let mut col_map = HashMap::new(); // Map csp column -> candidates index - for (i, cand_name) in reader.headers()?.into_iter().enumerate() { + for (index, cand_name) in reader.headers()?.into_iter().enumerate() { if cand_name == "$mult" { continue; } - col_map.insert(i, candidates.len()); + col_map.insert(index, candidates.len()); candidates.push(Candidate { + index, name: cand_name.to_string(), is_dummy: false, }); diff --git a/src/stv/gregory/transfers.rs b/src/stv/gregory/transfers.rs index 2c14447..4da058b 100644 --- a/src/stv/gregory/transfers.rs +++ b/src/stv/gregory/transfers.rs @@ -15,6 +15,8 @@ * along with this program. If not, see . */ +use nohash_hasher::BuildNoHashHasher; + #[cfg(not(target_arch = "wasm32"))] use prettytable::{Cell, Row, Table}; #[cfg(target_arch = "wasm32")] @@ -52,13 +54,15 @@ pub struct TransferTable<'e, N: Number> { impl<'e, N: Number> TransferTable<'e, N> { /// Return a new [TransferTable] for an exclusion pub fn new_exclusion(hopefuls: Vec<&'e Candidate>) -> Self { - TransferTable { + let num_hopefuls = hopefuls.len(); + + return TransferTable { hopefuls, columns: Vec::new(), total: TransferTableColumn { value_fraction: N::new(), order: 0, - cells: HashMap::new(), + cells: HashMap::with_capacity_and_hasher(num_hopefuls, BuildNoHashHasher::default()), exhausted: TransferTableCell { ballots: N::new(), votes_in: N::new(), votes_out: N::new() }, total: TransferTableCell { ballots: N::new(), votes_in: N::new(), votes_out: N::new() }, }, @@ -71,13 +75,15 @@ impl<'e, N: Number> TransferTable<'e, N> { /// Return a new [TransferTable] for a surplus distribution pub fn new_surplus(hopefuls: Vec<&'e Candidate>, surplus: N, surpfrac: Option, surpfrac_numer: Option, surpfrac_denom: Option) -> Self { - TransferTable { + let num_hopefuls = hopefuls.len(); + + return TransferTable { hopefuls, columns: Vec::new(), total: TransferTableColumn { value_fraction: N::new(), order: 0, - cells: HashMap::new(), + cells: HashMap::with_capacity_and_hasher(num_hopefuls, BuildNoHashHasher::default()), exhausted: TransferTableCell { ballots: N::new(), votes_in: N::new(), votes_out: N::new() }, total: TransferTableCell { ballots: N::new(), votes_in: N::new(), votes_out: N::new() }, }, @@ -102,7 +108,7 @@ impl<'e, N: Number> TransferTable<'e, N> { let mut col = TransferTableColumn { value_fraction: value_fraction.clone(), order: order.unwrap_or(0), - cells: HashMap::new(), + cells: HashMap::with_hasher(BuildNoHashHasher::default()), exhausted: TransferTableCell { ballots: N::new(), votes_in: N::new(), votes_out: N::new() }, total: TransferTableCell { ballots: N::new(), votes_in: N::new(), votes_out: N::new() }, }; @@ -124,7 +130,7 @@ impl<'e, N: Number> TransferTable<'e, N> { let col = TransferTableColumn { value_fraction: value_fraction.clone(), order: order.unwrap_or(0), - cells: HashMap::new(), + cells: HashMap::with_hasher(BuildNoHashHasher::default()), exhausted: TransferTableCell { ballots: ballots.clone(), votes_in: N::new(), votes_out: N::new() }, total: TransferTableCell { ballots: N::new(), votes_in: N::new(), votes_out: N::new() }, }; @@ -585,7 +591,7 @@ pub struct TransferTableColumn<'e, N: Number> { pub order: usize, /// Cells in this column - pub cells: HashMap<&'e Candidate, TransferTableCell>, + pub cells: HashMap<&'e Candidate, TransferTableCell, BuildNoHashHasher>, /// Exhausted cell pub exhausted: TransferTableCell, diff --git a/src/stv/meek.rs b/src/stv/meek.rs index e3ceb47..f0396c1 100644 --- a/src/stv/meek.rs +++ b/src/stv/meek.rs @@ -21,6 +21,7 @@ use crate::election::{Ballot, Candidate, CandidateState, CountCard, CountState, use crate::numbers::Number; use itertools::Itertools; +use nohash_hasher::BuildNoHashHasher; use std::cmp::max; use std::collections::HashMap; @@ -39,7 +40,7 @@ struct BallotInTree<'b, N: Number> { pub struct BallotTree<'t, N: Number> { num_ballots: N, ballots: Vec>, - next_preferences: Option>>>, + next_preferences: Option, BuildNoHashHasher>>>, next_exhausted: Option>>, } @@ -56,7 +57,7 @@ impl<'t, N: Number> BallotTree<'t, N> { /// Descend one level of the [BallotTree] fn descend_tree(&mut self, candidates: &'t [Candidate]) { - let mut next_preferences: HashMap<&Candidate, BallotTree> = HashMap::new(); + let mut next_preferences: HashMap<&Candidate, BallotTree, BuildNoHashHasher> = HashMap::with_capacity_and_hasher(candidates.len(), BuildNoHashHasher::default()); let mut next_exhausted = BallotTree::new(); for bit in self.ballots.iter() { @@ -164,7 +165,7 @@ where /// Distribute preferences recursively /// /// Called by [distribute_preferences] -fn distribute_recursively<'t, N: Number>(candidates: &mut HashMap<&'t Candidate, CountCard>, exhausted: &mut CountCard, tree: &mut BallotTree<'t, N>, remaining_multiplier: N, election: &'t Election, opts: &STVOptions) +fn distribute_recursively<'t, N: Number>(candidates: &mut HashMap<&'t Candidate, CountCard, BuildNoHashHasher>, exhausted: &mut CountCard, tree: &mut BallotTree<'t, N>, remaining_multiplier: N, election: &'t Election, opts: &STVOptions) where for<'r> &'r N: ops::Mul<&'r N, Output=N>, { diff --git a/src/stv/mod.rs b/src/stv/mod.rs index 60fc421..b5e5a96 100644 --- a/src/stv/mod.rs +++ b/src/stv/mod.rs @@ -35,6 +35,7 @@ use crate::ties::{self, TieStrategy}; use derive_builder::Builder; use derive_more::Constructor; use itertools::Itertools; +use nohash_hasher::BuildNoHashHasher; #[allow(unused_imports)] use wasm_bindgen::prelude::wasm_bindgen; @@ -781,7 +782,7 @@ where /// See [next_preferences] pub struct NextPreferencesResult<'a, N> { /// [NextPreferencesEntry] for each [Candidate] - pub candidates: HashMap<&'a Candidate, NextPreferencesEntry<'a, N>>, + pub candidates: HashMap<&'a Candidate, NextPreferencesEntry<'a, N>, BuildNoHashHasher>, /// [NextPreferencesEntry] for exhausted ballots pub exhausted: NextPreferencesEntry<'a, N>, /// Total weight of ballots examined @@ -799,7 +800,7 @@ pub struct NextPreferencesEntry<'a, N> { /// Count the given votes, grouping according to next available preference pub fn next_preferences<'a, N: Number>(state: &CountState<'a, N>, votes: Vec>) -> NextPreferencesResult<'a, N> { let mut result = NextPreferencesResult { - candidates: HashMap::new(), + candidates: HashMap::with_capacity_and_hasher(state.election.candidates.len(), BuildNoHashHasher::default()), exhausted: NextPreferencesEntry { votes: Vec::new(), num_ballots: N::new(), @@ -1767,7 +1768,7 @@ fn init_tiebreaks(state: &mut CountState, opts: &STVOptions) { // Update forwards tie-breaking order if opts.ties.iter().any(|t| t == &TieStrategy::Forwards) { - let mut hm: HashMap<&Candidate, usize> = HashMap::new(); + let mut hm: HashMap<&Candidate, usize, BuildNoHashHasher> = HashMap::with_capacity_and_hasher(state.candidates.len(), BuildNoHashHasher::default()); for (i, group) in sorted_candidates.iter().enumerate() { for (candidate, _) in group.iter() { hm.insert(candidate, i); @@ -1778,7 +1779,7 @@ fn init_tiebreaks(state: &mut CountState, opts: &STVOptions) { // Update backwards tie-breaking order if opts.ties.iter().any(|t| t == &TieStrategy::Backwards) { - let mut hm: HashMap<&Candidate, usize> = HashMap::new(); + let mut hm: HashMap<&Candidate, usize, BuildNoHashHasher> = HashMap::with_capacity_and_hasher(state.candidates.len(), BuildNoHashHasher::default()); for (i, group) in sorted_candidates.iter().enumerate() { for (candidate, _) in group.iter() { hm.insert(candidate, i);