From 7e0637484354845ceedd3b57cdd88ab6dce66983 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Wed, 7 Mar 2018 17:45:38 +0100 Subject: [PATCH] histogram: Implement variance This is useful for error estimates. --- src/traits.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++ tests/histogram.rs | 20 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/src/traits.rs b/src/traits.rs index 47d95e4..639e04f 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -12,6 +12,12 @@ pub trait Merge { fn merge(&mut self, other: &Self); } +/// Calculate the multinomial variance. Relevant for histograms. +#[inline(always)] +fn multinomal_variance(n: f64, n_tot_inv: f64) -> f64 { + n * (1. - n * n_tot_inv) +} + /// Get the bins and ranges from a histogram. pub trait Histogram: where for<'a> &'a Self: IntoIterator @@ -19,6 +25,16 @@ pub trait Histogram: /// Return the bins of the histogram. fn bins(&self) -> &[u64]; + /// Estimate the variance for the given bin. + /// + /// The square root of this estimates the error of the bin count. + #[inline] + fn variance(&self, bin: usize) -> f64 { + let count = self.bins()[bin]; + let sum: u64 = self.bins().iter().sum(); + multinomal_variance(count as f64, 1./(sum as f64)) + } + /// Return an iterator over the bins normalized by the bin widths. #[inline] fn normalized_bins(&self) -> IterNormalized<<&Self as IntoIterator>::IntoIter> { @@ -36,6 +52,18 @@ pub trait Histogram: fn centers(&self) -> IterBinCenters<<&Self as IntoIterator>::IntoIter> { IterBinCenters { histogram_iter: self.into_iter() } } + + /// Return an iterator over the bin variances. + /// + /// This is more efficient than using `variance()` each bin. + #[inline] + fn variances(&self) -> IterVariances<<&Self as IntoIterator>::IntoIter> { + let sum: u64 = self.bins().iter().sum(); + IterVariances { + histogram_iter: self.into_iter(), + sum_inv: 1./(sum as f64) + } + } } /// Iterate over the bins normalized by bin width. @@ -91,3 +119,23 @@ impl Iterator for IterBinCenters self.histogram_iter.next().map(|((a, b), _)| 0.5 * (a + b)) } } + +/// Iterate over the variances. +pub struct IterVariances + where T: Iterator +{ + histogram_iter: T, + sum_inv: f64, +} + +impl Iterator for IterVariances + where T: Iterator +{ + type Item = f64; + + #[inline] + fn next(&mut self) -> Option { + self.histogram_iter.next() + .map(|(_, n)| multinomal_variance(n as f64, self.sum_inv)) + } +} diff --git a/tests/histogram.rs b/tests/histogram.rs index 24b8cf0..3105af7 100644 --- a/tests/histogram.rs +++ b/tests/histogram.rs @@ -1,8 +1,10 @@ #[macro_use] extern crate average; extern crate core; +extern crate rand; use core::iter::Iterator; +use rand::distributions::IndependentSample; use average::Histogram; @@ -181,3 +183,21 @@ fn mul() { assert_eq!(h.bins(), expected.bins()); } + +#[test] +fn variance() { + let mut h = Histogram10::with_const_width(-3., 3.); + let normal = rand::distributions::Normal::new(0., 1.); + let mut rng = rand::weak_rng(); + for _ in 0..1000000 { + let _ = h.add(normal.ind_sample(&mut rng)); + } + println!("{:?}", h); + let sum: u64 = h.bins().iter().sum(); + let sum = sum as f64; + for (i, v) in h.variances().enumerate() { + assert_almost_eq!(v, h.variance(i), 1e-14); + let poissonian_variance = h.bins()[i] as f64; + assert_almost_eq!(v.sqrt() / sum, poissonian_variance.sqrt() / sum, 1e-4); + } +}