first working try, takes 6 seconds for 10e7 entries

This commit is contained in:
Aurel Feer 2024-08-18 01:56:10 +02:00
commit 0baea24eac
5 changed files with 78 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
measurements
# Added by cargo
/target

7
Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "onebrc-rust"
version = "0.1.0"

6
Cargo.toml Normal file
View File

@ -0,0 +1,6 @@
[package]
name = "onebrc-rust"
version = "0.1.0"
edition = "2021"
[dependencies]

2
bench.ps1 Normal file
View File

@ -0,0 +1,2 @@
cargo build
Measure-Command {start-process target/debug/onebrc-rust.exe measurements/measurements-10e7.txt -Wait}

58
src/main.rs Normal file
View File

@ -0,0 +1,58 @@
use std::env;
use std::fs::File;
use std::io::BufReader;
use std::io::prelude::*;
use std::collections::HashMap;
struct Station {
values: Vec<f64>
}
fn main() {
let args: Vec<String> = env::args().collect();
let filename = &args[1];
println!("{filename}");
let file = File::open(filename).unwrap();
let mut buffered_reader = BufReader::with_capacity(1024, file);
let mut line = String::new();
let mut stations = HashMap::<String, Station>::new();
stations.reserve(10_000);
loop {
line.clear();
let n = buffered_reader.read_line(&mut line).unwrap();
if n == 0 {
break;
}
// in a worst case, the last 6 chars in a line are taken up by the temp
// ...name;-14.4\n
// in a best case, the last 4 chars in a line are taken up by the temp
// ...name;9.2\n
for i in 5..7 {
if line.chars().nth_back(i) == Some(';') {
let temp: f64 = line[n - i..n - 1].parse().unwrap();
let name = String::from(&line[..n - i - 1]);
if let Some(station) = stations.get_mut(&name) {
station.values.push(temp);
} else {
stations.insert(name, Station { values: vec![temp] });
}
break;
}
}
}
let mut stations: Vec<_> = stations.iter().collect();
stations.sort_unstable_by(|(n1, _), (n2, _)| n1.cmp(n2));
for (name, station) in stations {
assert!(!station.values.is_empty());
println!("{};{:1};{:1};{:1}", name,
station.values.iter().cloned().fold(100., f64::min),
station.values.iter().sum::<f64>() / station.values.len() as f64,
station.values.iter().cloned().fold(-100., f64::max))
}
}