histogram: Implement variance
This is useful for error estimates.
This commit is contained in:
parent
0259728bb8
commit
7e06374843
|
@ -12,6 +12,12 @@ pub trait Merge {
|
||||||
fn merge(&mut self, other: &Self);
|
fn merge(&mut self, other: &Self);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Calculate the multinomial variance. Relevant for histograms.
|
||||||
|
#[inline(always)]
|
||||||
|
fn multinomal_variance(n: f64, n_tot_inv: f64) -> f64 {
|
||||||
|
n * (1. - n * n_tot_inv)
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the bins and ranges from a histogram.
|
/// Get the bins and ranges from a histogram.
|
||||||
pub trait Histogram:
|
pub trait Histogram:
|
||||||
where for<'a> &'a Self: IntoIterator<Item = ((f64, f64), u64)>
|
where for<'a> &'a Self: IntoIterator<Item = ((f64, f64), u64)>
|
||||||
|
@ -19,6 +25,16 @@ pub trait Histogram:
|
||||||
/// Return the bins of the histogram.
|
/// Return the bins of the histogram.
|
||||||
fn bins(&self) -> &[u64];
|
fn bins(&self) -> &[u64];
|
||||||
|
|
||||||
|
/// Estimate the variance for the given bin.
|
||||||
|
///
|
||||||
|
/// The square root of this estimates the error of the bin count.
|
||||||
|
#[inline]
|
||||||
|
fn variance(&self, bin: usize) -> f64 {
|
||||||
|
let count = self.bins()[bin];
|
||||||
|
let sum: u64 = self.bins().iter().sum();
|
||||||
|
multinomal_variance(count as f64, 1./(sum as f64))
|
||||||
|
}
|
||||||
|
|
||||||
/// Return an iterator over the bins normalized by the bin widths.
|
/// Return an iterator over the bins normalized by the bin widths.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn normalized_bins(&self) -> IterNormalized<<&Self as IntoIterator>::IntoIter> {
|
fn normalized_bins(&self) -> IterNormalized<<&Self as IntoIterator>::IntoIter> {
|
||||||
|
@ -36,6 +52,18 @@ pub trait Histogram:
|
||||||
fn centers(&self) -> IterBinCenters<<&Self as IntoIterator>::IntoIter> {
|
fn centers(&self) -> IterBinCenters<<&Self as IntoIterator>::IntoIter> {
|
||||||
IterBinCenters { histogram_iter: self.into_iter() }
|
IterBinCenters { histogram_iter: self.into_iter() }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return an iterator over the bin variances.
|
||||||
|
///
|
||||||
|
/// This is more efficient than using `variance()` each bin.
|
||||||
|
#[inline]
|
||||||
|
fn variances(&self) -> IterVariances<<&Self as IntoIterator>::IntoIter> {
|
||||||
|
let sum: u64 = self.bins().iter().sum();
|
||||||
|
IterVariances {
|
||||||
|
histogram_iter: self.into_iter(),
|
||||||
|
sum_inv: 1./(sum as f64)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate over the bins normalized by bin width.
|
/// Iterate over the bins normalized by bin width.
|
||||||
|
@ -91,3 +119,23 @@ impl<T> Iterator for IterBinCenters<T>
|
||||||
self.histogram_iter.next().map(|((a, b), _)| 0.5 * (a + b))
|
self.histogram_iter.next().map(|((a, b), _)| 0.5 * (a + b))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Iterate over the variances.
|
||||||
|
pub struct IterVariances<T>
|
||||||
|
where T: Iterator<Item = ((f64, f64), u64)>
|
||||||
|
{
|
||||||
|
histogram_iter: T,
|
||||||
|
sum_inv: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> Iterator for IterVariances<T>
|
||||||
|
where T: Iterator<Item = ((f64, f64), u64)>
|
||||||
|
{
|
||||||
|
type Item = f64;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<f64> {
|
||||||
|
self.histogram_iter.next()
|
||||||
|
.map(|(_, n)| multinomal_variance(n as f64, self.sum_inv))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
#[macro_use] extern crate average;
|
#[macro_use] extern crate average;
|
||||||
|
|
||||||
extern crate core;
|
extern crate core;
|
||||||
|
extern crate rand;
|
||||||
|
|
||||||
use core::iter::Iterator;
|
use core::iter::Iterator;
|
||||||
|
use rand::distributions::IndependentSample;
|
||||||
|
|
||||||
use average::Histogram;
|
use average::Histogram;
|
||||||
|
|
||||||
|
@ -181,3 +183,21 @@ fn mul() {
|
||||||
|
|
||||||
assert_eq!(h.bins(), expected.bins());
|
assert_eq!(h.bins(), expected.bins());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn variance() {
|
||||||
|
let mut h = Histogram10::with_const_width(-3., 3.);
|
||||||
|
let normal = rand::distributions::Normal::new(0., 1.);
|
||||||
|
let mut rng = rand::weak_rng();
|
||||||
|
for _ in 0..1000000 {
|
||||||
|
let _ = h.add(normal.ind_sample(&mut rng));
|
||||||
|
}
|
||||||
|
println!("{:?}", h);
|
||||||
|
let sum: u64 = h.bins().iter().sum();
|
||||||
|
let sum = sum as f64;
|
||||||
|
for (i, v) in h.variances().enumerate() {
|
||||||
|
assert_almost_eq!(v, h.variance(i), 1e-14);
|
||||||
|
let poissonian_variance = h.bins()[i] as f64;
|
||||||
|
assert_almost_eq!(v.sqrt() / sum, poissonian_variance.sqrt() / sum, 1e-4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user