Split up moments into one file for each
This anticipates adding skewness.
This commit is contained in:
parent
19127cede7
commit
712303b58a
@ -39,13 +39,13 @@ extern crate conv;
|
|||||||
extern crate quickersort;
|
extern crate quickersort;
|
||||||
|
|
||||||
#[macro_use] mod macros;
|
#[macro_use] mod macros;
|
||||||
mod average;
|
mod moments;
|
||||||
mod weighted_average;
|
mod weighted_average;
|
||||||
mod minmax;
|
mod minmax;
|
||||||
mod reduce;
|
mod reduce;
|
||||||
mod quantile;
|
mod quantile;
|
||||||
|
|
||||||
pub use average::{Average, AverageWithError};
|
pub use moments::{Average, AverageWithError};
|
||||||
pub use weighted_average::{WeightedAverage, WeightedAverageWithError};
|
pub use weighted_average::{WeightedAverage, WeightedAverageWithError};
|
||||||
pub use minmax::{Min, Max};
|
pub use minmax::{Min, Max};
|
||||||
pub use quantile::Quantile;
|
pub use quantile::Quantile;
|
||||||
|
132
src/moments/mean.rs
Normal file
132
src/moments/mean.rs
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
use core;
|
||||||
|
|
||||||
|
use conv::ApproxFrom;
|
||||||
|
|
||||||
|
|
||||||
|
/// Estimate the arithmetic mean of a sequence of numbers ("population").
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use average::Average;
|
||||||
|
///
|
||||||
|
/// let a: Average = (1..6).map(Into::into).collect();
|
||||||
|
/// println!("The average is {}.", a.mean());
|
||||||
|
/// ```
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Average {
|
||||||
|
/// Average value.
|
||||||
|
avg: f64,
|
||||||
|
/// Sample size.
|
||||||
|
n: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Average {
|
||||||
|
/// Create a new average estimator.
|
||||||
|
#[inline]
|
||||||
|
pub fn new() -> Average {
|
||||||
|
Average { avg: 0., n: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an observation sampled from the population.
|
||||||
|
#[inline]
|
||||||
|
pub fn add(&mut self, sample: f64) {
|
||||||
|
self.increment();
|
||||||
|
let delta_n = (sample - self.avg)
|
||||||
|
/ f64::approx_from(self.n).unwrap();
|
||||||
|
self.add_inner(delta_n);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increment the sample size.
|
||||||
|
///
|
||||||
|
/// This does not update anything else.
|
||||||
|
#[inline]
|
||||||
|
pub fn increment(&mut self) {
|
||||||
|
self.n += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an observation given an already calculated difference from the mean
|
||||||
|
/// divided by the number of samples, assuming the inner count of the sample
|
||||||
|
/// size was already updated.
|
||||||
|
///
|
||||||
|
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
||||||
|
pub fn add_inner(&mut self, delta_n: f64) {
|
||||||
|
// This algorithm introduced by Welford in 1962 trades numerical
|
||||||
|
// stability for a division inside the loop.
|
||||||
|
//
|
||||||
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
|
self.avg += delta_n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine whether the sample is empty.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.n == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Estimate the mean of the population.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
|
#[inline]
|
||||||
|
pub fn mean(&self) -> f64 {
|
||||||
|
self.avg
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the sample size.
|
||||||
|
#[inline]
|
||||||
|
pub fn len(&self) -> u64 {
|
||||||
|
self.n
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge another sample into this one.
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use average::Average;
|
||||||
|
///
|
||||||
|
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||||
|
/// let (left, right) = sequence.split_at(3);
|
||||||
|
/// let avg_total: Average = sequence.iter().map(|x| *x).collect();
|
||||||
|
/// let mut avg_left: Average = left.iter().map(|x| *x).collect();
|
||||||
|
/// let avg_right: Average = right.iter().map(|x| *x).collect();
|
||||||
|
/// avg_left.merge(&avg_right);
|
||||||
|
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn merge(&mut self, other: &Average) {
|
||||||
|
// This algorithm was proposed by Chan et al. in 1979.
|
||||||
|
//
|
||||||
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
|
let len_self = f64::approx_from(self.n).unwrap();
|
||||||
|
let len_other = f64::approx_from(other.n).unwrap();
|
||||||
|
let len_total = len_self + len_other;
|
||||||
|
self.n += other.n;
|
||||||
|
self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
|
||||||
|
// Chan et al. use
|
||||||
|
//
|
||||||
|
// self.avg += delta * len_other / len_total;
|
||||||
|
//
|
||||||
|
// instead but this results in cancelation if the number of samples are similar.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::default::Default for Average {
|
||||||
|
fn default() -> Average {
|
||||||
|
Average::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::iter::FromIterator<f64> for Average {
|
||||||
|
fn from_iter<T>(iter: T) -> Average
|
||||||
|
where T: IntoIterator<Item=f64>
|
||||||
|
{
|
||||||
|
let mut a = Average::new();
|
||||||
|
for i in iter {
|
||||||
|
a.add(i);
|
||||||
|
}
|
||||||
|
a
|
||||||
|
}
|
||||||
|
}
|
2
src/moments/mod.rs
Normal file
2
src/moments/mod.rs
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
include!("mean.rs");
|
||||||
|
include!("variance.rs");
|
@ -1,136 +1,3 @@
|
|||||||
use core;
|
|
||||||
|
|
||||||
use conv::ApproxFrom;
|
|
||||||
|
|
||||||
|
|
||||||
/// Estimate the arithmetic mean of a sequence of numbers ("population").
|
|
||||||
///
|
|
||||||
///
|
|
||||||
/// ## Example
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use average::Average;
|
|
||||||
///
|
|
||||||
/// let a: Average = (1..6).map(Into::into).collect();
|
|
||||||
/// println!("The average is {}.", a.mean());
|
|
||||||
/// ```
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Average {
|
|
||||||
/// Average value.
|
|
||||||
avg: f64,
|
|
||||||
/// Sample size.
|
|
||||||
n: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Average {
|
|
||||||
/// Create a new average estimator.
|
|
||||||
#[inline]
|
|
||||||
pub fn new() -> Average {
|
|
||||||
Average { avg: 0., n: 0 }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add an observation sampled from the population.
|
|
||||||
#[inline]
|
|
||||||
pub fn add(&mut self, sample: f64) {
|
|
||||||
self.increment();
|
|
||||||
let delta_n = (sample - self.avg)
|
|
||||||
/ f64::approx_from(self.n).unwrap();
|
|
||||||
self.add_inner(delta_n);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Increment the sample size.
|
|
||||||
///
|
|
||||||
/// This does not update anything else.
|
|
||||||
#[inline]
|
|
||||||
pub fn increment(&mut self) {
|
|
||||||
self.n += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Add an observation given an already calculated difference from the mean
|
|
||||||
/// divided by the number of samples, assuming the inner count of the sample
|
|
||||||
/// size was already updated.
|
|
||||||
///
|
|
||||||
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
|
||||||
pub fn add_inner(&mut self, delta_n: f64) {
|
|
||||||
// This algorithm introduced by Welford in 1962 trades numerical
|
|
||||||
// stability for a division inside the loop.
|
|
||||||
//
|
|
||||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
|
||||||
self.avg += delta_n;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Determine whether the sample is empty.
|
|
||||||
#[inline]
|
|
||||||
pub fn is_empty(&self) -> bool {
|
|
||||||
self.n == 0
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Estimate the mean of the population.
|
|
||||||
///
|
|
||||||
/// Returns 0 for an empty sample.
|
|
||||||
#[inline]
|
|
||||||
pub fn mean(&self) -> f64 {
|
|
||||||
self.avg
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the sample size.
|
|
||||||
#[inline]
|
|
||||||
pub fn len(&self) -> u64 {
|
|
||||||
self.n
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Merge another sample into this one.
|
|
||||||
///
|
|
||||||
///
|
|
||||||
/// ## Example
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// use average::Average;
|
|
||||||
///
|
|
||||||
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
|
||||||
/// let (left, right) = sequence.split_at(3);
|
|
||||||
/// let avg_total: Average = sequence.iter().map(|x| *x).collect();
|
|
||||||
/// let mut avg_left: Average = left.iter().map(|x| *x).collect();
|
|
||||||
/// let avg_right: Average = right.iter().map(|x| *x).collect();
|
|
||||||
/// avg_left.merge(&avg_right);
|
|
||||||
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
|
||||||
/// ```
|
|
||||||
#[inline]
|
|
||||||
pub fn merge(&mut self, other: &Average) {
|
|
||||||
// This algorithm was proposed by Chan et al. in 1979.
|
|
||||||
//
|
|
||||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
|
||||||
let len_self = f64::approx_from(self.n).unwrap();
|
|
||||||
let len_other = f64::approx_from(other.n).unwrap();
|
|
||||||
let len_total = len_self + len_other;
|
|
||||||
self.n += other.n;
|
|
||||||
self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
|
|
||||||
// Chan et al. use
|
|
||||||
//
|
|
||||||
// self.avg += delta * len_other / len_total;
|
|
||||||
//
|
|
||||||
// instead but this results in cancelation if the number of samples are similar.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl core::default::Default for Average {
|
|
||||||
fn default() -> Average {
|
|
||||||
Average::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl core::iter::FromIterator<f64> for Average {
|
|
||||||
fn from_iter<T>(iter: T) -> Average
|
|
||||||
where T: IntoIterator<Item=f64>
|
|
||||||
{
|
|
||||||
let mut a = Average::new();
|
|
||||||
for i in iter {
|
|
||||||
a.add(i);
|
|
||||||
}
|
|
||||||
a
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
||||||
/// ("population").
|
/// ("population").
|
||||||
///
|
///
|
||||||
@ -150,13 +17,13 @@ pub struct AverageWithError {
|
|||||||
/// Estimator of average.
|
/// Estimator of average.
|
||||||
avg: Average,
|
avg: Average,
|
||||||
/// Intermediate sum of squares for calculating the variance.
|
/// Intermediate sum of squares for calculating the variance.
|
||||||
v: f64,
|
sum_2: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AverageWithError {
|
impl AverageWithError {
|
||||||
/// Create a new average estimator.
|
/// Create a new average estimator.
|
||||||
pub fn new() -> AverageWithError {
|
pub fn new() -> AverageWithError {
|
||||||
AverageWithError { avg: Average::new(), v: 0. }
|
AverageWithError { avg: Average::new(), sum_2: 0. }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add an observation sampled from the population.
|
/// Add an observation sampled from the population.
|
||||||
@ -188,7 +55,7 @@ impl AverageWithError {
|
|||||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
let n = f64::approx_from(self.avg.len()).unwrap();
|
let n = f64::approx_from(self.avg.len()).unwrap();
|
||||||
self.avg.add_inner(delta_n);
|
self.avg.add_inner(delta_n);
|
||||||
self.v += delta_n * delta_n * n * (n - 1.);
|
self.sum_2 += delta_n * delta_n * n * (n - 1.);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine whether the sample is empty.
|
/// Determine whether the sample is empty.
|
||||||
@ -219,7 +86,7 @@ impl AverageWithError {
|
|||||||
if self.avg.len() < 2 {
|
if self.avg.len() < 2 {
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
self.v / f64::approx_from(self.avg.len() - 1).unwrap()
|
self.sum_2 / f64::approx_from(self.avg.len() - 1).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the population variance of the sample.
|
/// Calculate the population variance of the sample.
|
||||||
@ -231,7 +98,7 @@ impl AverageWithError {
|
|||||||
if n < 2 {
|
if n < 2 {
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
self.v / f64::approx_from(n).unwrap()
|
self.sum_2 / f64::approx_from(n).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the standard error of the mean of the population.
|
/// Estimate the standard error of the mean of the population.
|
||||||
@ -271,7 +138,7 @@ impl AverageWithError {
|
|||||||
let len_total = len_self + len_other;
|
let len_total = len_self + len_other;
|
||||||
let delta = other.mean() - self.mean();
|
let delta = other.mean() - self.mean();
|
||||||
self.avg.merge(&other.avg);
|
self.avg.merge(&other.avg);
|
||||||
self.v += other.v + delta*delta * len_self * len_other / len_total;
|
self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user