2023-11-11 00:25:19 +11:00
// hpstat: High-performance statistics implementations
// Copyright © 2023 Lee Yingtong Li (RunasSudo)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use std ::io ::BufRead ;
pub fn read_csv < R : BufRead > ( mut reader : R ) -> ( Vec < String > , Vec < f64 > ) {
// This custom CSV parser is faster than the csv library because we do not waste time allocating Strings for the data which will inevitably be parsed to float anyway
// Reuse a single buffer to avoid unnecessary allocations
// Since we need to make copies only for the headers - the data are directly parsed to float
let mut buffer = String ::new ( ) ;
// Read header
let headers = read_row_as_strings ( & mut reader , & mut buffer ) ;
// Read data
let mut data = Vec ::new ( ) ;
let mut row = Vec ::new ( ) ;
loop {
if read_row_as_floats ( & mut reader , & mut buffer , & mut row ) {
if row . len ( ) ! = headers . len ( ) {
panic! ( " Expected row of {} entries, got {} entries " , headers . len ( ) , row . len ( ) ) ;
}
data . append ( & mut row ) ;
} else {
// EOF
break ;
}
}
return ( headers , data ) ;
}
fn read_row_as_strings < R : BufRead > ( reader : & mut R , buffer : & mut String ) -> Vec < String > {
buffer . clear ( ) ;
let bytes_read = reader . read_line ( buffer ) . expect ( " IO error " ) ;
if bytes_read = = 0 {
panic! ( " Unexpected EOF " ) ;
}
let mut result = Vec ::new ( ) ;
let mut entries_iter = buffer . trim ( ) . split ( ',' ) ;
loop {
if let Some ( entry ) = entries_iter . next ( ) {
if entry . starts_with ( '"' ) {
if entry . ends_with ( '"' ) {
result . push ( String ::from ( & entry [ 1 .. ( entry . len ( ) - 1 ) ] ) ) ;
} else {
let mut full_entry = String ::from ( & entry [ 1 .. ] ) ;
// Read remainder of quoted entry
loop {
if let Some ( entry_part ) = entries_iter . next ( ) {
if entry_part . ends_with ( '"' ) {
// End of quoted entry
2023-11-11 22:06:01 +11:00
// TODO: No support for escaping double quotes
2023-11-11 00:25:19 +11:00
full_entry . push_str ( & entry_part [ .. ( entry_part . len ( ) - 1 ) ] ) ;
result . push ( full_entry ) ;
break ;
} else {
// Middle of quoted entry
full_entry . push_str ( entry_part ) ;
full_entry . push_str ( & " , " ) ;
}
} else {
panic! ( " Unexpected EOL while reading quoted CSV entry " ) ;
}
}
}
} else {
result . push ( String ::from ( entry ) ) ;
}
} else {
// EOL
break ;
}
}
return result ;
}
fn read_row_as_floats < R : BufRead > ( reader : & mut R , buffer : & mut String , row : & mut Vec < f64 > ) -> bool {
buffer . clear ( ) ;
let bytes_read = reader . read_line ( buffer ) . expect ( " IO error " ) ;
if bytes_read = = 0 {
// EOF
return false ;
}
let mut entries_iter = buffer . trim ( ) . split ( ',' ) ;
loop {
if let Some ( entry ) = entries_iter . next ( ) {
if entry . starts_with ( '"' ) {
if entry . ends_with ( '"' ) {
row . push ( parse_float ( & entry [ 1 .. ( entry . len ( ) - 1 ) ] ) ) ;
} else {
// Float cannot have a comma in it
panic! ( " Malformed float " ) ;
}
} else {
row . push ( parse_float ( entry ) ) ;
}
} else {
// EOL
break ;
}
}
return true ;
}
fn parse_float ( s : & str ) -> f64 {
let value = match s {
" inf " = > f64 ::INFINITY ,
_ = > s . parse ( ) . expect ( " Malformed float " )
} ;
return value ;
}