// hpstat: High-performance statistics implementations // Copyright © 2023 Lee Yingtong Li (RunasSudo) // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . use std::io::BufRead; pub fn read_csv(mut reader: R) -> (Vec, Vec) { // This custom CSV parser is faster than the csv library because we do not waste time allocating Strings for the data which will inevitably be parsed to float anyway // Reuse a single buffer to avoid unnecessary allocations // Since we need to make copies only for the headers - the data are directly parsed to float let mut buffer = String::new(); // Read header let headers = read_row_as_strings(&mut reader, &mut buffer); // Read data let mut data = Vec::new(); let mut row = Vec::new(); loop { if read_row_as_floats(&mut reader, &mut buffer, &mut row) { if row.len() != headers.len() { panic!("Expected row of {} entries, got {} entries", headers.len(), row.len()); } data.append(&mut row); } else { // EOF break; } } return (headers, data); } fn read_row_as_strings(reader: &mut R, buffer: &mut String) -> Vec { buffer.clear(); let bytes_read = reader.read_line(buffer).expect("IO error"); if bytes_read == 0 { panic!("Unexpected EOF"); } let mut result = Vec::new(); let mut entries_iter = buffer.trim().split(','); loop { if let Some(entry) = entries_iter.next() { if entry.starts_with('"') { if entry.ends_with('"') { result.push(String::from(&entry[1..(entry.len() - 1)])); } else { let mut full_entry = String::from(&entry[1..]); // Read remainder of quoted entry loop { if let Some(entry_part) = entries_iter.next() { if entry_part.ends_with('"') { // End of quoted entry // TODO: No support for escaping double quotes full_entry.push_str(&entry_part[..(entry_part.len() - 1)]); result.push(full_entry); break; } else { // Middle of quoted entry full_entry.push_str(entry_part); full_entry.push_str(&","); } } else { panic!("Unexpected EOL while reading quoted CSV entry"); } } } } else { result.push(String::from(entry)); } } else { // EOL break; } } return result; } fn read_row_as_floats(reader: &mut R, buffer: &mut String, row: &mut Vec) -> bool { buffer.clear(); let bytes_read = reader.read_line(buffer).expect("IO error"); if bytes_read == 0 { // EOF return false; } let mut entries_iter = buffer.trim().split(','); loop { if let Some(entry) = entries_iter.next() { if entry.starts_with('"') { if entry.ends_with('"') { row.push(parse_float(&entry[1..(entry.len() - 1)])); } else { // Float cannot have a comma in it panic!("Malformed float"); } } else { row.push(parse_float(entry)); } } else { // EOL break; } } return true; } fn parse_float(s: &str) -> f64 { let value = match s { "inf" => f64::INFINITY, _ => s.parse().expect("Malformed float") }; return value; }