From 3a0dcafd21aa2704fe75f81f56f50873ae8bf62c Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 6 Mar 2018 15:54:30 +0100 Subject: [PATCH] Implement histograms In order for them to have constant size, a macro is provided to crate the histogram type. This should be replaced by const generics once Rust has them. --- src/histogram.rs | 114 +++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + tests/histogram.rs | 73 +++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+) create mode 100644 src/histogram.rs create mode 100644 tests/histogram.rs diff --git a/src/histogram.rs b/src/histogram.rs new file mode 100644 index 0000000..7786a9e --- /dev/null +++ b/src/histogram.rs @@ -0,0 +1,114 @@ +/// Define a histogram with a number of bins known at compile time. +/// +/// ``` +/// # extern crate core; +/// # #[macro_use] extern crate average; +/// # fn main() { +/// define_histogram!(Histogram, 10); +/// let mut h = Histogram::with_const_width(0., 100.); +/// for i in 0..100 { +/// h.add(i as f64).unwrap(); +/// } +/// assert_eq!(h.bins(), &[10, 10, 10, 10, 10, 10, 10, 10, 10, 10]); +/// # } +/// ``` +#[macro_export] +macro_rules! define_histogram { + ($name:ident, $LEN:expr) => ( + /// The number of bins of the histogram. + const LEN: usize = $LEN; + + /// A histogram with a number of bins known at compile time. + #[derive(Debug, Clone)] + pub struct $name { + range: [f64; LEN + 1], + bin: [u64; LEN], + } + + impl $name { + /// Construct a histogram with constant bin width. + #[inline] + pub fn with_const_width(start: f64, end: f64) -> Self { + let step = (end - start) / (LEN as f64); + let mut range = [0.; LEN + 1]; + for i in 0..(LEN + 1) { + range[i] = step * (i as f64); + } + + Self { + range: range, + bin: [0; LEN], + } + } + + /// Construct a histogram from given ranges. + /// + /// The ranges are given by an iterator of floats where neighboring + /// pairs `(a, b)` define a bin for all `x` where `a <= x < b`. + /// + /// Fails if the iterator is too short (less than `n + 1` where `n` + /// is the number of bins), is not sorted or contains `nan`. `inf` + /// and empty ranges are allowed. + #[inline] + pub fn from_ranges(ranges: T) -> Result + where T: IntoIterator + { + let mut range = [0.; LEN + 1]; + let mut last_i = 0; + for (i, r) in ranges.into_iter().enumerate() { + if i > LEN { + break; + } + if r.is_nan() { + return Err(()); + } + if i > 0 && range[i - 1] > r { + return Err(()); + } + range[i] = r; + last_i = i; + } + if last_i != LEN { + return Err(()); + } + Ok(Self { + range: range, + bin: [0; LEN], + }) + } + + /// Add a sample to the histogram. + /// + /// Fails if the sample is out of range of the histogram. + #[inline] + pub fn add(&mut self, x: f64) -> Result<(), ()> { + // We made sure our ranges are valid at construction, so we can + // safely unwrap. + match self.range.binary_search_by(|p| p.partial_cmp(&x).unwrap()) { + Ok(i) if i < LEN => { + self.bin[i] += 1; + }, + Err(i) if i > 0 && i < LEN + 1 => { + self.bin[i - 1] += 1; + }, + _ => { + return Err(()); + }, + } + Ok(()) + } + + /// Return the bins of the histogram. + #[inline] + pub fn bins(&self) -> &[u64] { + &self.bin as &[u64] + } + + /// Return the ranges of the histogram. + #[inline] + pub fn ranges(&self) -> &[f64] { + &self.range as &[f64] + } + } + ); +} diff --git a/src/lib.rs b/src/lib.rs index a857c05..dec830c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -89,6 +89,7 @@ mod weighted_mean; mod minmax; mod quantile; mod traits; +#[macro_use] mod histogram; pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError, Moments}; pub use weighted_mean::{WeightedMean, WeightedMeanWithError}; diff --git a/tests/histogram.rs b/tests/histogram.rs new file mode 100644 index 0000000..700d4ff --- /dev/null +++ b/tests/histogram.rs @@ -0,0 +1,73 @@ +#[macro_use] extern crate average; + +extern crate core; + +use core::iter::Iterator; + +define_histogram!(Histogram, 10); + +#[test] +fn with_const_width() { + let mut h = Histogram::with_const_width(0., 100.); + for i in 0..100 { + h.add(i as f64).unwrap(); + } + assert_eq!(h.bins(), &[10, 10, 10, 10, 10, 10, 10, 10, 10, 10]); +} + +#[test] +fn from_ranges() { + let mut h = Histogram::from_ranges( + [0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9, 1.0, 2.0].iter().cloned()).unwrap(); + for &i in &[0.05, 0.7, 1.0, 1.5] { + h.add(i).unwrap(); + } + assert_eq!(h.bins(), &[1, 0, 0, 0, 0, 0, 1, 0, 0, 2]); +} + +#[test] +fn from_ranges_infinity() { + let inf = std::f64::INFINITY; + let mut h = Histogram::from_ranges( + [-inf, -0.4, -0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4, inf].iter().cloned()).unwrap(); + for &i in &[-100., -0.45, 0., 0.25, 0.4, 100.] { + h.add(i).unwrap(); + } + assert_eq!(h.bins(), &[2, 0, 0, 0, 0, 1, 0, 1, 0, 2]); +} + +#[test] +fn from_ranges_invalid() { + assert!(Histogram::from_ranges([].iter().cloned()).is_err()); + let valid = vec![0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9, 1.0, 2.0]; + assert!(Histogram::from_ranges(valid.iter().cloned()).is_ok()); + let mut invalid_nan = valid.clone(); + invalid_nan[3] = std::f64::NAN; + assert!(Histogram::from_ranges(invalid_nan.iter().cloned()).is_err()); + let mut invalid_order = valid.clone(); + invalid_order[10] = 0.9; + assert!(Histogram::from_ranges(invalid_order.iter().cloned()).is_err()); + let mut valid_empty_ranges = valid.clone(); + valid_empty_ranges[1] = 0.; + valid_empty_ranges[10] = 1.; +} + +#[test] +fn from_ranges_empty() { + let mut h = Histogram::from_ranges( + [0., 0., 0.2, 0.3, 0.4, 0.5, 0.5, 0.8, 0.9, 2.0, 2.0].iter().cloned()).unwrap(); + for &i in &[0.05, 0.7, 1.0, 1.5] { + h.add(i).unwrap(); + } + assert_eq!(h.bins(), &[0, 1, 0, 0, 0, 0, 1, 0, 2, 0]); +} + +#[test] +fn out_of_range() { + let mut h = Histogram::with_const_width(0., 100.); + assert_eq!(h.add(-0.1), Err(())); + assert_eq!(h.add(0.0), Ok(())); + assert_eq!(h.add(1.0), Ok(())); + assert_eq!(h.add(100.0), Err(())); + assert_eq!(h.add(100.1), Err(())); +}