Split up moments into one file for each

This anticipates adding skewness.
This commit is contained in:
Vinzent Steinberg 2017-05-28 21:00:04 +02:00
parent 19127cede7
commit 712303b58a
4 changed files with 142 additions and 141 deletions

View File

@ -39,13 +39,13 @@ extern crate conv;
extern crate quickersort; extern crate quickersort;
#[macro_use] mod macros; #[macro_use] mod macros;
mod average; mod moments;
mod weighted_average; mod weighted_average;
mod minmax; mod minmax;
mod reduce; mod reduce;
mod quantile; mod quantile;
pub use average::{Average, AverageWithError}; pub use moments::{Average, AverageWithError};
pub use weighted_average::{WeightedAverage, WeightedAverageWithError}; pub use weighted_average::{WeightedAverage, WeightedAverageWithError};
pub use minmax::{Min, Max}; pub use minmax::{Min, Max};
pub use quantile::Quantile; pub use quantile::Quantile;

132
src/moments/mean.rs Normal file
View File

@ -0,0 +1,132 @@
use core;
use conv::ApproxFrom;
/// Estimate the arithmetic mean of a sequence of numbers ("population").
///
///
/// ## Example
///
/// ```
/// use average::Average;
///
/// let a: Average = (1..6).map(Into::into).collect();
/// println!("The average is {}.", a.mean());
/// ```
#[derive(Debug, Clone)]
pub struct Average {
/// Average value.
avg: f64,
/// Sample size.
n: u64,
}
impl Average {
/// Create a new average estimator.
#[inline]
pub fn new() -> Average {
Average { avg: 0., n: 0 }
}
/// Add an observation sampled from the population.
#[inline]
pub fn add(&mut self, sample: f64) {
self.increment();
let delta_n = (sample - self.avg)
/ f64::approx_from(self.n).unwrap();
self.add_inner(delta_n);
}
/// Increment the sample size.
///
/// This does not update anything else.
#[inline]
pub fn increment(&mut self) {
self.n += 1;
}
/// Add an observation given an already calculated difference from the mean
/// divided by the number of samples, assuming the inner count of the sample
/// size was already updated.
///
/// This is useful for avoiding unnecessary divisions in the inner loop.
pub fn add_inner(&mut self, delta_n: f64) {
// This algorithm introduced by Welford in 1962 trades numerical
// stability for a division inside the loop.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
self.avg += delta_n;
}
/// Determine whether the sample is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.n == 0
}
/// Estimate the mean of the population.
///
/// Returns 0 for an empty sample.
#[inline]
pub fn mean(&self) -> f64 {
self.avg
}
/// Return the sample size.
#[inline]
pub fn len(&self) -> u64 {
self.n
}
/// Merge another sample into this one.
///
///
/// ## Example
///
/// ```
/// use average::Average;
///
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
/// let (left, right) = sequence.split_at(3);
/// let avg_total: Average = sequence.iter().map(|x| *x).collect();
/// let mut avg_left: Average = left.iter().map(|x| *x).collect();
/// let avg_right: Average = right.iter().map(|x| *x).collect();
/// avg_left.merge(&avg_right);
/// assert_eq!(avg_total.mean(), avg_left.mean());
/// ```
#[inline]
pub fn merge(&mut self, other: &Average) {
// This algorithm was proposed by Chan et al. in 1979.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let len_self = f64::approx_from(self.n).unwrap();
let len_other = f64::approx_from(other.n).unwrap();
let len_total = len_self + len_other;
self.n += other.n;
self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
// Chan et al. use
//
// self.avg += delta * len_other / len_total;
//
// instead but this results in cancelation if the number of samples are similar.
}
}
impl core::default::Default for Average {
fn default() -> Average {
Average::new()
}
}
impl core::iter::FromIterator<f64> for Average {
fn from_iter<T>(iter: T) -> Average
where T: IntoIterator<Item=f64>
{
let mut a = Average::new();
for i in iter {
a.add(i);
}
a
}
}

2
src/moments/mod.rs Normal file
View File

@ -0,0 +1,2 @@
include!("mean.rs");
include!("variance.rs");

View File

@ -1,136 +1,3 @@
use core;
use conv::ApproxFrom;
/// Estimate the arithmetic mean of a sequence of numbers ("population").
///
///
/// ## Example
///
/// ```
/// use average::Average;
///
/// let a: Average = (1..6).map(Into::into).collect();
/// println!("The average is {}.", a.mean());
/// ```
#[derive(Debug, Clone)]
pub struct Average {
/// Average value.
avg: f64,
/// Sample size.
n: u64,
}
impl Average {
/// Create a new average estimator.
#[inline]
pub fn new() -> Average {
Average { avg: 0., n: 0 }
}
/// Add an observation sampled from the population.
#[inline]
pub fn add(&mut self, sample: f64) {
self.increment();
let delta_n = (sample - self.avg)
/ f64::approx_from(self.n).unwrap();
self.add_inner(delta_n);
}
/// Increment the sample size.
///
/// This does not update anything else.
#[inline]
pub fn increment(&mut self) {
self.n += 1;
}
/// Add an observation given an already calculated difference from the mean
/// divided by the number of samples, assuming the inner count of the sample
/// size was already updated.
///
/// This is useful for avoiding unnecessary divisions in the inner loop.
pub fn add_inner(&mut self, delta_n: f64) {
// This algorithm introduced by Welford in 1962 trades numerical
// stability for a division inside the loop.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
self.avg += delta_n;
}
/// Determine whether the sample is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.n == 0
}
/// Estimate the mean of the population.
///
/// Returns 0 for an empty sample.
#[inline]
pub fn mean(&self) -> f64 {
self.avg
}
/// Return the sample size.
#[inline]
pub fn len(&self) -> u64 {
self.n
}
/// Merge another sample into this one.
///
///
/// ## Example
///
/// ```
/// use average::Average;
///
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
/// let (left, right) = sequence.split_at(3);
/// let avg_total: Average = sequence.iter().map(|x| *x).collect();
/// let mut avg_left: Average = left.iter().map(|x| *x).collect();
/// let avg_right: Average = right.iter().map(|x| *x).collect();
/// avg_left.merge(&avg_right);
/// assert_eq!(avg_total.mean(), avg_left.mean());
/// ```
#[inline]
pub fn merge(&mut self, other: &Average) {
// This algorithm was proposed by Chan et al. in 1979.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let len_self = f64::approx_from(self.n).unwrap();
let len_other = f64::approx_from(other.n).unwrap();
let len_total = len_self + len_other;
self.n += other.n;
self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
// Chan et al. use
//
// self.avg += delta * len_other / len_total;
//
// instead but this results in cancelation if the number of samples are similar.
}
}
impl core::default::Default for Average {
fn default() -> Average {
Average::new()
}
}
impl core::iter::FromIterator<f64> for Average {
fn from_iter<T>(iter: T) -> Average
where T: IntoIterator<Item=f64>
{
let mut a = Average::new();
for i in iter {
a.add(i);
}
a
}
}
/// Estimate the arithmetic mean and the variance of a sequence of numbers /// Estimate the arithmetic mean and the variance of a sequence of numbers
/// ("population"). /// ("population").
/// ///
@ -150,13 +17,13 @@ pub struct AverageWithError {
/// Estimator of average. /// Estimator of average.
avg: Average, avg: Average,
/// Intermediate sum of squares for calculating the variance. /// Intermediate sum of squares for calculating the variance.
v: f64, sum_2: f64,
} }
impl AverageWithError { impl AverageWithError {
/// Create a new average estimator. /// Create a new average estimator.
pub fn new() -> AverageWithError { pub fn new() -> AverageWithError {
AverageWithError { avg: Average::new(), v: 0. } AverageWithError { avg: Average::new(), sum_2: 0. }
} }
/// Add an observation sampled from the population. /// Add an observation sampled from the population.
@ -188,7 +55,7 @@ impl AverageWithError {
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let n = f64::approx_from(self.avg.len()).unwrap(); let n = f64::approx_from(self.avg.len()).unwrap();
self.avg.add_inner(delta_n); self.avg.add_inner(delta_n);
self.v += delta_n * delta_n * n * (n - 1.); self.sum_2 += delta_n * delta_n * n * (n - 1.);
} }
/// Determine whether the sample is empty. /// Determine whether the sample is empty.
@ -219,7 +86,7 @@ impl AverageWithError {
if self.avg.len() < 2 { if self.avg.len() < 2 {
return 0.; return 0.;
} }
self.v / f64::approx_from(self.avg.len() - 1).unwrap() self.sum_2 / f64::approx_from(self.avg.len() - 1).unwrap()
} }
/// Calculate the population variance of the sample. /// Calculate the population variance of the sample.
@ -231,7 +98,7 @@ impl AverageWithError {
if n < 2 { if n < 2 {
return 0.; return 0.;
} }
self.v / f64::approx_from(n).unwrap() self.sum_2 / f64::approx_from(n).unwrap()
} }
/// Estimate the standard error of the mean of the population. /// Estimate the standard error of the mean of the population.
@ -271,7 +138,7 @@ impl AverageWithError {
let len_total = len_self + len_other; let len_total = len_self + len_other;
let delta = other.mean() - self.mean(); let delta = other.mean() - self.mean();
self.avg.merge(&other.avg); self.avg.merge(&other.avg);
self.v += other.v + delta*delta * len_self * len_other / len_total; self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
} }
} }