From 0baea24eacde0d74640ef6518c02ba349f3b06c1 Mon Sep 17 00:00:00 2001 From: Aurel Feer Date: Sun, 18 Aug 2024 01:56:10 +0200 Subject: [PATCH] first working try, takes 6 seconds for 10e7 entries --- .gitignore | 5 +++++ Cargo.lock | 7 +++++++ Cargo.toml | 6 ++++++ bench.ps1 | 2 ++ src/main.rs | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 78 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 bench.ps1 create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..90535d6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +measurements + +# Added by cargo + +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..12603f5 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "onebrc-rust" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..03b682b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "onebrc-rust" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/bench.ps1 b/bench.ps1 new file mode 100644 index 0000000..97725bb --- /dev/null +++ b/bench.ps1 @@ -0,0 +1,2 @@ +cargo build +Measure-Command {start-process target/debug/onebrc-rust.exe measurements/measurements-10e7.txt -Wait} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..750e8f1 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,58 @@ +use std::env; +use std::fs::File; +use std::io::BufReader; +use std::io::prelude::*; +use std::collections::HashMap; + +struct Station { + values: Vec +} + +fn main() { + let args: Vec = env::args().collect(); + let filename = &args[1]; + println!("{filename}"); + + let file = File::open(filename).unwrap(); + + let mut buffered_reader = BufReader::with_capacity(1024, file); + let mut line = String::new(); + + let mut stations = HashMap::::new(); + stations.reserve(10_000); + loop { + line.clear(); + let n = buffered_reader.read_line(&mut line).unwrap(); + if n == 0 { + break; + } + + // in a worst case, the last 6 chars in a line are taken up by the temp + // ...name;-14.4\n + // in a best case, the last 4 chars in a line are taken up by the temp + // ...name;9.2\n + for i in 5..7 { + if line.chars().nth_back(i) == Some(';') { + let temp: f64 = line[n - i..n - 1].parse().unwrap(); + let name = String::from(&line[..n - i - 1]); + + if let Some(station) = stations.get_mut(&name) { + station.values.push(temp); + } else { + stations.insert(name, Station { values: vec![temp] }); + } + break; + } + } + } + + let mut stations: Vec<_> = stations.iter().collect(); + stations.sort_unstable_by(|(n1, _), (n2, _)| n1.cmp(n2)); + for (name, station) in stations { + assert!(!station.values.is_empty()); + println!("{};{:1};{:1};{:1}", name, + station.values.iter().cloned().fold(100., f64::min), + station.values.iter().sum::() / station.values.len() as f64, + station.values.iter().cloned().fold(-100., f64::max)) + } +}