histogram: Implement variance

This is useful for error estimates.
This commit is contained in:
Vinzent Steinberg 2018-03-07 17:45:38 +01:00
parent 0259728bb8
commit 7e06374843
2 changed files with 68 additions and 0 deletions

View File

@ -12,6 +12,12 @@ pub trait Merge {
fn merge(&mut self, other: &Self);
}
/// Calculate the multinomial variance. Relevant for histograms.
#[inline(always)]
fn multinomal_variance(n: f64, n_tot_inv: f64) -> f64 {
n * (1. - n * n_tot_inv)
}
/// Get the bins and ranges from a histogram.
pub trait Histogram:
where for<'a> &'a Self: IntoIterator<Item = ((f64, f64), u64)>
@ -19,6 +25,16 @@ pub trait Histogram:
/// Return the bins of the histogram.
fn bins(&self) -> &[u64];
/// Estimate the variance for the given bin.
///
/// The square root of this estimates the error of the bin count.
#[inline]
fn variance(&self, bin: usize) -> f64 {
let count = self.bins()[bin];
let sum: u64 = self.bins().iter().sum();
multinomal_variance(count as f64, 1./(sum as f64))
}
/// Return an iterator over the bins normalized by the bin widths.
#[inline]
fn normalized_bins(&self) -> IterNormalized<<&Self as IntoIterator>::IntoIter> {
@ -36,6 +52,18 @@ pub trait Histogram:
fn centers(&self) -> IterBinCenters<<&Self as IntoIterator>::IntoIter> {
IterBinCenters { histogram_iter: self.into_iter() }
}
/// Return an iterator over the bin variances.
///
/// This is more efficient than using `variance()` each bin.
#[inline]
fn variances(&self) -> IterVariances<<&Self as IntoIterator>::IntoIter> {
let sum: u64 = self.bins().iter().sum();
IterVariances {
histogram_iter: self.into_iter(),
sum_inv: 1./(sum as f64)
}
}
}
/// Iterate over the bins normalized by bin width.
@ -91,3 +119,23 @@ impl<T> Iterator for IterBinCenters<T>
self.histogram_iter.next().map(|((a, b), _)| 0.5 * (a + b))
}
}
/// Iterate over the variances.
pub struct IterVariances<T>
where T: Iterator<Item = ((f64, f64), u64)>
{
histogram_iter: T,
sum_inv: f64,
}
impl<T> Iterator for IterVariances<T>
where T: Iterator<Item = ((f64, f64), u64)>
{
type Item = f64;
#[inline]
fn next(&mut self) -> Option<f64> {
self.histogram_iter.next()
.map(|(_, n)| multinomal_variance(n as f64, self.sum_inv))
}
}

View File

@ -1,8 +1,10 @@
#[macro_use] extern crate average;
extern crate core;
extern crate rand;
use core::iter::Iterator;
use rand::distributions::IndependentSample;
use average::Histogram;
@ -181,3 +183,21 @@ fn mul() {
assert_eq!(h.bins(), expected.bins());
}
#[test]
fn variance() {
let mut h = Histogram10::with_const_width(-3., 3.);
let normal = rand::distributions::Normal::new(0., 1.);
let mut rng = rand::weak_rng();
for _ in 0..1000000 {
let _ = h.add(normal.ind_sample(&mut rng));
}
println!("{:?}", h);
let sum: u64 = h.bins().iter().sum();
let sum = sum as f64;
for (i, v) in h.variances().enumerate() {
assert_almost_eq!(v, h.variance(i), 1e-14);
let poissonian_variance = h.bins()[i] as f64;
assert_almost_eq!(v.sqrt() / sum, poissonian_variance.sqrt() / sum, 1e-4);
}
}