135 lines
3.6 KiB
Rust
135 lines
3.6 KiB
Rust
// hpstat: High-performance statistics implementations
|
|
// Copyright © 2023 Lee Yingtong Li (RunasSudo)
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
use std::io::BufRead;
|
|
|
|
pub fn read_csv<R: BufRead>(mut reader: R) -> (Vec<String>, Vec<f64>) {
|
|
// This custom CSV parser is faster than the csv library because we do not waste time allocating Strings for the data which will inevitably be parsed to float anyway
|
|
|
|
// Reuse a single buffer to avoid unnecessary allocations
|
|
// Since we need to make copies only for the headers - the data are directly parsed to float
|
|
let mut buffer = String::new();
|
|
|
|
// Read header
|
|
let headers = read_row_as_strings(&mut reader, &mut buffer);
|
|
|
|
// Read data
|
|
let mut data = Vec::new();
|
|
let mut row = Vec::new();
|
|
loop {
|
|
if read_row_as_floats(&mut reader, &mut buffer, &mut row) {
|
|
if row.len() != headers.len() {
|
|
panic!("Expected row of {} entries, got {} entries", headers.len(), row.len());
|
|
}
|
|
|
|
data.append(&mut row);
|
|
} else {
|
|
// EOF
|
|
break;
|
|
}
|
|
}
|
|
|
|
return (headers, data);
|
|
}
|
|
|
|
fn read_row_as_strings<R: BufRead>(reader: &mut R, buffer: &mut String) -> Vec<String> {
|
|
buffer.clear();
|
|
|
|
let bytes_read = reader.read_line(buffer).expect("IO error");
|
|
if bytes_read == 0 {
|
|
panic!("Unexpected EOF");
|
|
}
|
|
|
|
let mut result = Vec::new();
|
|
let mut entries_iter = buffer.trim().split(',');
|
|
loop {
|
|
if let Some(entry) = entries_iter.next() {
|
|
if entry.starts_with('"') {
|
|
if entry.ends_with('"') {
|
|
result.push(String::from(&entry[1..(entry.len() - 1)]));
|
|
} else {
|
|
let mut full_entry = String::from(&entry[1..]);
|
|
|
|
// Read remainder of quoted entry
|
|
loop {
|
|
if let Some(entry_part) = entries_iter.next() {
|
|
if entry_part.ends_with('"') {
|
|
// End of quoted entry
|
|
// TODO: No support for escaping double quotes
|
|
full_entry.push_str(&entry_part[..(entry_part.len() - 1)]);
|
|
result.push(full_entry);
|
|
break;
|
|
} else {
|
|
// Middle of quoted entry
|
|
full_entry.push_str(entry_part);
|
|
full_entry.push_str(&",");
|
|
}
|
|
} else {
|
|
panic!("Unexpected EOL while reading quoted CSV entry");
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
result.push(String::from(entry));
|
|
}
|
|
} else {
|
|
// EOL
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
fn read_row_as_floats<R: BufRead>(reader: &mut R, buffer: &mut String, row: &mut Vec<f64>) -> bool {
|
|
buffer.clear();
|
|
|
|
let bytes_read = reader.read_line(buffer).expect("IO error");
|
|
if bytes_read == 0 {
|
|
// EOF
|
|
return false;
|
|
}
|
|
|
|
let mut entries_iter = buffer.trim().split(',');
|
|
loop {
|
|
if let Some(entry) = entries_iter.next() {
|
|
if entry.starts_with('"') {
|
|
if entry.ends_with('"') {
|
|
row.push(parse_float(&entry[1..(entry.len() - 1)]));
|
|
} else {
|
|
// Float cannot have a comma in it
|
|
panic!("Malformed float");
|
|
}
|
|
} else {
|
|
row.push(parse_float(entry));
|
|
}
|
|
} else {
|
|
// EOL
|
|
break;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
fn parse_float(s: &str) -> f64 {
|
|
let value = match s {
|
|
"inf" => f64::INFINITY,
|
|
_ => s.parse().expect("Malformed float")
|
|
};
|
|
return value;
|
|
}
|