2017-05-05 17:42:21 +02:00
|
|
|
use core;
|
|
|
|
|
|
|
|
use conv::ApproxFrom;
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
|
|
|
/// ("population").
|
|
|
|
///
|
|
|
|
/// This can be used to estimate the standard error of the mean.
|
2017-05-05 17:42:21 +02:00
|
|
|
///
|
|
|
|
/// Everything is calculated iteratively using constant memory, so the sequence
|
|
|
|
/// of numbers can be an iterator. The used algorithms try to avoid numerical
|
|
|
|
/// instabilities.
|
|
|
|
///
|
2017-05-19 17:53:54 +02:00
|
|
|
///
|
|
|
|
/// ## Example
|
|
|
|
///
|
2017-05-05 17:42:21 +02:00
|
|
|
/// ```
|
2017-05-24 10:24:57 +02:00
|
|
|
/// use average::AverageWithError;
|
2017-05-05 17:42:21 +02:00
|
|
|
///
|
2017-05-24 10:24:57 +02:00
|
|
|
/// let a: AverageWithError = (1..6).map(Into::into).collect();
|
2017-05-19 17:53:54 +02:00
|
|
|
/// println!("The average is {} ± {}.", a.mean(), a.error());
|
2017-05-05 17:42:21 +02:00
|
|
|
/// ```
|
|
|
|
#[derive(Debug, Clone)]
|
2017-05-24 10:24:57 +02:00
|
|
|
pub struct AverageWithError {
|
2017-05-05 17:42:21 +02:00
|
|
|
/// Average value.
|
|
|
|
avg: f64,
|
|
|
|
/// Number of samples.
|
|
|
|
n: u64,
|
|
|
|
/// Intermediate sum of squares for calculating the variance.
|
|
|
|
v: f64,
|
|
|
|
}
|
|
|
|
|
2017-05-24 10:24:57 +02:00
|
|
|
impl AverageWithError {
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Create a new average estimator.
|
2017-05-24 10:24:57 +02:00
|
|
|
pub fn new() -> AverageWithError {
|
|
|
|
AverageWithError { avg: 0., n: 0, v: 0. }
|
2017-05-05 17:42:21 +02:00
|
|
|
}
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Add an element sampled from the population.
|
2017-05-23 21:10:50 +02:00
|
|
|
#[inline]
|
2017-05-05 19:07:03 +02:00
|
|
|
pub fn add(&mut self, sample: f64) {
|
2017-05-05 17:42:21 +02:00
|
|
|
// This algorithm introduced by Welford in 1962 trades numerical
|
|
|
|
// stability for a division inside the loop.
|
|
|
|
//
|
|
|
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
|
|
|
self.n += 1;
|
2017-05-05 19:07:03 +02:00
|
|
|
let delta = sample - self.avg;
|
2017-05-05 17:42:21 +02:00
|
|
|
self.avg += delta / f64::approx_from(self.n).unwrap();
|
2017-05-05 19:07:03 +02:00
|
|
|
self.v += delta * (sample - self.avg);
|
|
|
|
}
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Determine whether the samples are empty.
|
2017-05-05 19:07:03 +02:00
|
|
|
pub fn is_empty(&self) -> bool {
|
|
|
|
self.n == 0
|
2017-05-05 17:42:21 +02:00
|
|
|
}
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Estimate the mean of the population.
|
2017-05-05 17:42:21 +02:00
|
|
|
pub fn mean(&self) -> f64 {
|
|
|
|
self.avg
|
|
|
|
}
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Return the number of samples.
|
2017-05-05 17:42:21 +02:00
|
|
|
pub fn len(&self) -> u64 {
|
|
|
|
self.n
|
|
|
|
}
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Calculate the sample variance.
|
2017-05-05 17:42:21 +02:00
|
|
|
///
|
2017-05-19 17:53:54 +02:00
|
|
|
/// This is an unbiased estimator of the variance of the population.
|
2017-05-05 17:42:21 +02:00
|
|
|
pub fn sample_variance(&self) -> f64 {
|
|
|
|
if self.n < 2 {
|
|
|
|
return 0.;
|
|
|
|
}
|
|
|
|
self.v / f64::approx_from(self.n - 1).unwrap()
|
|
|
|
}
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Calculate the population variance of the sample.
|
2017-05-05 17:42:21 +02:00
|
|
|
///
|
2017-05-19 17:53:54 +02:00
|
|
|
/// This is a biased estimator of the variance of the population.
|
2017-05-05 17:42:21 +02:00
|
|
|
pub fn population_variance(&self) -> f64 {
|
|
|
|
if self.n < 2 {
|
|
|
|
return 0.;
|
|
|
|
}
|
|
|
|
self.v / f64::approx_from(self.n).unwrap()
|
|
|
|
}
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Estimate the standard error of the mean of the population.
|
2017-05-05 17:42:21 +02:00
|
|
|
pub fn error(&self) -> f64 {
|
|
|
|
if self.n == 0 {
|
|
|
|
return 0.;
|
|
|
|
}
|
|
|
|
(self.sample_variance() / f64::approx_from(self.n).unwrap()).sqrt()
|
|
|
|
}
|
|
|
|
|
2017-05-19 17:53:54 +02:00
|
|
|
/// Merge another sample into this one.
|
|
|
|
///
|
|
|
|
///
|
|
|
|
/// ## Example
|
2017-05-05 17:42:21 +02:00
|
|
|
///
|
|
|
|
/// ```
|
2017-05-24 10:24:57 +02:00
|
|
|
/// use average::AverageWithError;
|
2017-05-05 17:42:21 +02:00
|
|
|
///
|
|
|
|
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
|
|
|
/// let (left, right) = sequence.split_at(3);
|
2017-05-24 10:24:57 +02:00
|
|
|
/// let avg_total: AverageWithError = sequence.iter().map(|x| *x).collect();
|
|
|
|
/// let mut avg_left: AverageWithError = left.iter().map(|x| *x).collect();
|
|
|
|
/// let avg_right: AverageWithError = right.iter().map(|x| *x).collect();
|
2017-05-05 17:42:21 +02:00
|
|
|
/// avg_left.merge(&avg_right);
|
|
|
|
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
|
|
|
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
|
|
|
/// ```
|
2017-05-24 10:24:57 +02:00
|
|
|
pub fn merge(&mut self, other: &AverageWithError) {
|
2017-05-05 17:42:21 +02:00
|
|
|
// This algorithm was proposed by Chan et al. in 1979.
|
|
|
|
//
|
|
|
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
|
|
|
let delta = other.avg - self.avg;
|
|
|
|
let len_self = f64::approx_from(self.n).unwrap();
|
|
|
|
let len_other = f64::approx_from(other.n).unwrap();
|
|
|
|
let len_total = len_self + len_other;
|
|
|
|
self.n += other.n;
|
|
|
|
self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
|
|
|
|
// Chan et al. use
|
|
|
|
//
|
|
|
|
// self.avg += delta * len_other / len_total;
|
|
|
|
//
|
|
|
|
// instead but this results in cancelation if the number of samples are similar.
|
|
|
|
self.v += other.v + delta*delta * len_self * len_other / len_total;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-24 10:24:57 +02:00
|
|
|
impl core::default::Default for AverageWithError {
|
|
|
|
fn default() -> AverageWithError {
|
|
|
|
AverageWithError::new()
|
2017-05-05 17:42:21 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-24 10:24:57 +02:00
|
|
|
impl core::iter::FromIterator<f64> for AverageWithError {
|
|
|
|
fn from_iter<T>(iter: T) -> AverageWithError
|
2017-05-05 17:42:21 +02:00
|
|
|
where T: IntoIterator<Item=f64>
|
|
|
|
{
|
2017-05-24 10:24:57 +02:00
|
|
|
let mut a = AverageWithError::new();
|
2017-05-05 17:42:21 +02:00
|
|
|
for i in iter {
|
|
|
|
a.add(i);
|
|
|
|
}
|
|
|
|
a
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn merge() {
|
|
|
|
let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
|
|
|
for mid in 0..sequence.len() {
|
|
|
|
let (left, right) = sequence.split_at(mid);
|
2017-05-24 10:24:57 +02:00
|
|
|
let avg_total: AverageWithError = sequence.iter().map(|x| *x).collect();
|
|
|
|
let mut avg_left: AverageWithError = left.iter().map(|x| *x).collect();
|
|
|
|
let avg_right: AverageWithError = right.iter().map(|x| *x).collect();
|
2017-05-05 17:42:21 +02:00
|
|
|
avg_left.merge(&avg_right);
|
|
|
|
assert_eq!(avg_total.n, avg_left.n);
|
|
|
|
assert_eq!(avg_total.avg, avg_left.avg);
|
|
|
|
assert_eq!(avg_total.v, avg_left.v);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|