Split up moments into one file for each

This anticipates adding skewness.
2017-05-28 21:00:04 +02:00 · 2017-05-28 21:00:04 +02:00 · 712303b58a
commit 712303b58a
parent 19127cede7
4 changed files with 142 additions and 141 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -39,13 +39,13 @@ extern crate conv;
 extern crate quickersort;
 #[macro_use] mod macros;
-mod average;
+mod moments;
 mod weighted_average;
 mod minmax;
 mod reduce;
 mod quantile;
-pub use average::{Average, AverageWithError};
+pub use moments::{Average, AverageWithError};
 pub use weighted_average::{WeightedAverage, WeightedAverageWithError};
 pub use minmax::{Min, Max};
 pub use quantile::Quantile;
--- a/src/moments/mean.rs
+++ b/src/moments/mean.rs
@ -0,0 +1,132 @@
 use core;
 use conv::ApproxFrom;
 /// Estimate the arithmetic mean of a sequence of numbers ("population").
 ///
 ///
 /// ## Example
 ///
 /// ```
 /// use average::Average;
 ///
 /// let a: Average = (1..6).map(Into::into).collect();
 /// println!("The average is {}.", a.mean());
 /// ```
 #[derive(Debug, Clone)]
 pub struct Average {
    /// Average value.
    avg: f64,
    /// Sample size.
    n: u64,
 }
 impl Average {
    /// Create a new average estimator.
    #[inline]
    pub fn new() -> Average {
        Average { avg: 0., n: 0 }
    }
    /// Add an observation sampled from the population.
    #[inline]
    pub fn add(&mut self, sample: f64) {
        self.increment();
        let delta_n = (sample - self.avg)
            / f64::approx_from(self.n).unwrap();
        self.add_inner(delta_n);
    }
    /// Increment the sample size.
    ///
    /// This does not update anything else.
    #[inline]
    pub fn increment(&mut self) {
        self.n += 1;
    }
    /// Add an observation given an already calculated difference from the mean
    /// divided by the number of samples, assuming the inner count of the sample
    /// size was already updated.
    ///
    /// This is useful for avoiding unnecessary divisions in the inner loop.
    pub fn add_inner(&mut self, delta_n: f64) {
        // This algorithm introduced by Welford in 1962 trades numerical
        // stability for a division inside the loop.
        //
        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
        self.avg += delta_n;
    }
    /// Determine whether the sample is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.n == 0
    }
    /// Estimate the mean of the population.
    ///
    /// Returns 0 for an empty sample.
    #[inline]
    pub fn mean(&self) -> f64 {
        self.avg
    }
    /// Return the sample size.
    #[inline]
    pub fn len(&self) -> u64 {
        self.n
    }
    /// Merge another sample into this one.
    ///
    ///
    /// ## Example
    ///
    /// ```
    /// use average::Average;
    ///
    /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
    /// let (left, right) = sequence.split_at(3);
    /// let avg_total: Average = sequence.iter().map(|x| *x).collect();
    /// let mut avg_left: Average = left.iter().map(|x| *x).collect();
    /// let avg_right: Average = right.iter().map(|x| *x).collect();
    /// avg_left.merge(&avg_right);
    /// assert_eq!(avg_total.mean(), avg_left.mean());
    /// ```
    #[inline]
    pub fn merge(&mut self, other: &Average) {
        // This algorithm was proposed by Chan et al. in 1979.
        //
        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
        let len_self = f64::approx_from(self.n).unwrap();
        let len_other = f64::approx_from(other.n).unwrap();
        let len_total = len_self + len_other;
        self.n += other.n;
        self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
        // Chan et al. use
        //
        //     self.avg += delta * len_other / len_total;
        //
        // instead but this results in cancelation if the number of samples are similar.
    }
 }
 impl core::default::Default for Average {
    fn default() -> Average {
        Average::new()
    }
 }
 impl core::iter::FromIterator<f64> for Average {
    fn from_iter<T>(iter: T) -> Average
        where T: IntoIterator<Item=f64>
    {
        let mut a = Average::new();
        for i in iter {
            a.add(i);
        }
        a
    }
 }
--- a/src/moments/mod.rs
+++ b/src/moments/mod.rs
@ -0,0 +1,2 @@
 include!("mean.rs");
 include!("variance.rs");
--- a/src/moments/variance.rs
+++ b/src/moments/variance.rs
@ -1,136 +1,3 @@
 use core;
 use conv::ApproxFrom;
 /// Estimate the arithmetic mean of a sequence of numbers ("population").
 ///
 ///
 /// ## Example
 ///
 /// ```
 /// use average::Average;
 ///
 /// let a: Average = (1..6).map(Into::into).collect();
 /// println!("The average is {}.", a.mean());
 /// ```
 #[derive(Debug, Clone)]
 pub struct Average {
    /// Average value.
    avg: f64,
    /// Sample size.
    n: u64,
 }
 impl Average {
    /// Create a new average estimator.
    #[inline]
    pub fn new() -> Average {
        Average { avg: 0., n: 0 }
    }
    /// Add an observation sampled from the population.
    #[inline]
    pub fn add(&mut self, sample: f64) {
        self.increment();
        let delta_n = (sample - self.avg)
            / f64::approx_from(self.n).unwrap();
        self.add_inner(delta_n);
    }
    /// Increment the sample size.
    ///
    /// This does not update anything else.
    #[inline]
    pub fn increment(&mut self) {
        self.n += 1;
    }
    /// Add an observation given an already calculated difference from the mean
    /// divided by the number of samples, assuming the inner count of the sample
    /// size was already updated.
    ///
    /// This is useful for avoiding unnecessary divisions in the inner loop.
    pub fn add_inner(&mut self, delta_n: f64) {
        // This algorithm introduced by Welford in 1962 trades numerical
        // stability for a division inside the loop.
        //
        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
        self.avg += delta_n;
    }
    /// Determine whether the sample is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.n == 0
    }
    /// Estimate the mean of the population.
    ///
    /// Returns 0 for an empty sample.
    #[inline]
    pub fn mean(&self) -> f64 {
        self.avg
    }
    /// Return the sample size.
    #[inline]
    pub fn len(&self) -> u64 {
        self.n
    }
    /// Merge another sample into this one.
    ///
    ///
    /// ## Example
    ///
    /// ```
    /// use average::Average;
    ///
    /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
    /// let (left, right) = sequence.split_at(3);
    /// let avg_total: Average = sequence.iter().map(|x| *x).collect();
    /// let mut avg_left: Average = left.iter().map(|x| *x).collect();
    /// let avg_right: Average = right.iter().map(|x| *x).collect();
    /// avg_left.merge(&avg_right);
    /// assert_eq!(avg_total.mean(), avg_left.mean());
    /// ```
    #[inline]
    pub fn merge(&mut self, other: &Average) {
        // This algorithm was proposed by Chan et al. in 1979.
        //
        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
        let len_self = f64::approx_from(self.n).unwrap();
        let len_other = f64::approx_from(other.n).unwrap();
        let len_total = len_self + len_other;
        self.n += other.n;
        self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
        // Chan et al. use
        //
        //     self.avg += delta * len_other / len_total;
        //
        // instead but this results in cancelation if the number of samples are similar.
    }
 }
 impl core::default::Default for Average {
    fn default() -> Average {
        Average::new()
    }
 }
 impl core::iter::FromIterator<f64> for Average {
    fn from_iter<T>(iter: T) -> Average
        where T: IntoIterator<Item=f64>
    {
        let mut a = Average::new();
        for i in iter {
            a.add(i);
        }
        a
    }
 }
 /// Estimate the arithmetic mean and the variance of a sequence of numbers
 /// ("population").
 ///
@ -150,13 +17,13 @@ pub struct AverageWithError {
    /// Estimator of average.
    avg: Average,
    /// Intermediate sum of squares for calculating the variance.
-    v: f64,
+    sum_2: f64,
 }
 impl AverageWithError {
    /// Create a new average estimator.
    pub fn new() -> AverageWithError {
-        AverageWithError { avg: Average::new(), v: 0. }
+        AverageWithError { avg: Average::new(), sum_2: 0. }
    }
    /// Add an observation sampled from the population.
@ -188,7 +55,7 @@ impl AverageWithError {
        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
        let n = f64::approx_from(self.avg.len()).unwrap();
        self.avg.add_inner(delta_n);
-        self.v += delta_n * delta_n * n * (n - 1.);
+        self.sum_2 += delta_n * delta_n * n * (n - 1.);
    }
    /// Determine whether the sample is empty.
@ -219,7 +86,7 @@ impl AverageWithError {
        if self.avg.len() < 2 {
            return 0.;
        }
-        self.v / f64::approx_from(self.avg.len() - 1).unwrap()
+        self.sum_2 / f64::approx_from(self.avg.len() - 1).unwrap()
    }
    /// Calculate the population variance of the sample.
@ -231,7 +98,7 @@ impl AverageWithError {
        if n < 2 {
            return 0.;
        }
-        self.v / f64::approx_from(n).unwrap()
+        self.sum_2 / f64::approx_from(n).unwrap()
    }
    /// Estimate the standard error of the mean of the population.
@ -271,7 +138,7 @@ impl AverageWithError {
        let len_total = len_self + len_other;
        let delta = other.mean() - self.mean();
        self.avg.merge(&other.avg);
-        self.v += other.v + delta*delta * len_self * len_other / len_total;
+        self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
    }
 }
		`@ -0,0 +1,2 @@`
							`include!("mean.rs");`
							`include!("variance.rs");`