hpstat/src/csv.rs

135 lines
3.6 KiB
Rust

// hpstat: High-performance statistics implementations
// Copyright © 2023 Lee Yingtong Li (RunasSudo)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use std::io::BufRead;
pub fn read_csv<R: BufRead>(mut reader: R) -> (Vec<String>, Vec<f64>) {
// This custom CSV parser is faster than the csv library because we do not waste time allocating Strings for the data which will inevitably be parsed to float anyway
// Reuse a single buffer to avoid unnecessary allocations
// Since we need to make copies only for the headers - the data are directly parsed to float
let mut buffer = String::new();
// Read header
let headers = read_row_as_strings(&mut reader, &mut buffer);
// Read data
let mut data = Vec::new();
let mut row = Vec::new();
loop {
if read_row_as_floats(&mut reader, &mut buffer, &mut row) {
if row.len() != headers.len() {
panic!("Expected row of {} entries, got {} entries", headers.len(), row.len());
}
data.append(&mut row);
} else {
// EOF
break;
}
}
return (headers, data);
}
fn read_row_as_strings<R: BufRead>(reader: &mut R, buffer: &mut String) -> Vec<String> {
buffer.clear();
let bytes_read = reader.read_line(buffer).expect("IO error");
if bytes_read == 0 {
panic!("Unexpected EOF");
}
let mut result = Vec::new();
let mut entries_iter = buffer.trim().split(',');
loop {
if let Some(entry) = entries_iter.next() {
if entry.starts_with('"') {
if entry.ends_with('"') {
result.push(String::from(&entry[1..(entry.len() - 1)]));
} else {
let mut full_entry = String::from(&entry[1..]);
// Read remainder of quoted entry
loop {
if let Some(entry_part) = entries_iter.next() {
if entry_part.ends_with('"') {
// End of quoted entry
// TODO: No support for escaping double quotes
full_entry.push_str(&entry_part[..(entry_part.len() - 1)]);
result.push(full_entry);
break;
} else {
// Middle of quoted entry
full_entry.push_str(entry_part);
full_entry.push_str(&",");
}
} else {
panic!("Unexpected EOL while reading quoted CSV entry");
}
}
}
} else {
result.push(String::from(entry));
}
} else {
// EOL
break;
}
}
return result;
}
fn read_row_as_floats<R: BufRead>(reader: &mut R, buffer: &mut String, row: &mut Vec<f64>) -> bool {
buffer.clear();
let bytes_read = reader.read_line(buffer).expect("IO error");
if bytes_read == 0 {
// EOF
return false;
}
let mut entries_iter = buffer.trim().split(',');
loop {
if let Some(entry) = entries_iter.next() {
if entry.starts_with('"') {
if entry.ends_with('"') {
row.push(parse_float(&entry[1..(entry.len() - 1)]));
} else {
// Float cannot have a comma in it
panic!("Malformed float");
}
} else {
row.push(parse_float(entry));
}
} else {
// EOL
break;
}
}
return true;
}
fn parse_float(s: &str) -> f64 {
let value = match s {
"inf" => f64::INFINITY,
_ => s.parse().expect("Malformed float")
};
return value;
}