variance

2019-12-02 12:49:54 +01:00 · 2019-12-02 12:49:54 +01:00 · be1bd4c5c6
commit be1bd4c5c6
parent 27a14185ee
3 changed files with 201 additions and 26 deletions
--- a/src/moments/mod.rs
+++ b/src/moments/mod.rs
@ -6,12 +6,12 @@ use conv::ApproxFrom;
 use super::{Estimate, Merge};

 include!("mean.rs");
-include!("variance.rs");
+include!("variance1.rs");
 include!("skewness.rs");
 include!("kurtosis.rs");

-/// Alias for `Variance`.
-pub type MeanWithError = Variance;
+/// Alias for `Variance1`.
+pub type MeanWithError = Variance1;

 #[doc(hidden)]
 #[macro_export]
@ -301,7 +301,7 @@ macro_rules! define_moments_inner {
 /// Define an estimator of all moments up to a number given at compile time.
 ///
 /// This uses a [general algorithm][paper] and is slightly less efficient than
-/// the specialized implementations (such as [`Mean`], [`Variance`],
+/// the specialized implementations (such as [`Mean`], [`Variance1`],
 /// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4.
 ///
 /// (In practise, there is an upper limit due to integer overflow and possibly
@ -309,7 +309,7 @@ macro_rules! define_moments_inner {
 ///
 /// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
 /// [`Mean`]: ./struct.Mean.html
-/// [`Variance`]: ./struct.Variance.html
+/// [`Variance1`]: ./struct.Variance1.html
 /// [`Skewness`]: ./struct.Skewness.html
 /// [`Kurtosis`]: ./struct.Kurtosis.html
 ///
--- a/src/moments/variance0.rs
+++ b/src/moments/variance0.rs
@ -1,3 +1,9 @@
+use crate::matrix_variance_mean::Mean;
+use average::Estimate;
+use average::Merge;
+
+const ddof: f64 = 0.;
+
 /// Estimate the arithmetic mean and the variance of a sequence of numbers
 /// ("population").
 ///
@ -7,25 +13,25 @@
 /// ## Example
 ///
 /// ```
-/// use average::Variance;
+/// use average::Variance0;
 ///
-/// let a: Variance = (1..6).map(f64::from).collect();
+/// let a: Variance0 = (1..6).map(f64::from).collect();
 /// println!("The mean is {} ± {}.", a.mean(), a.error());
 /// ```
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
-pub struct Variance {
+pub struct Variance0 {
    /// Estimator of average.
    avg: Mean,
    /// Intermediate sum of squares for calculating the variance.
    sum_2: f64,
 }

-impl Variance {
+impl Variance0 {
    /// Create a new variance estimator.
    #[inline]
-    pub fn new() -> Variance {
-        Variance { avg: Mean::new(), sum_2: 0. }
+    pub fn new() -> Variance0 {
+        Variance0 { avg: Mean::new(), sum_2: 0. }
    }

    /// Increment the sample size.
@ -49,7 +55,7 @@ impl Variance {
        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
        let n = f64::approx_from(self.avg.len()).unwrap();
        self.avg.add_inner(delta_n);
-        self.sum_2 += delta_n * delta_n * n * (n - 1.);
+        self.sum_2 += delta_n * delta_n * n * (n - ddof);
    }

    /// Determine whether the sample is empty.
@ -77,10 +83,10 @@ impl Variance {
    /// This is an unbiased estimator of the variance of the population.
    #[inline]
    pub fn sample_variance(&self) -> f64 {
-        if self.avg.len() < 2 {
+        if self.avg.len() < 1 + ddof {
            return 0.;
        }
-        self.sum_2 / f64::approx_from(self.avg.len() - 1).unwrap()
+        self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap()
    }

    /// Calculate the population variance of the sample.
@ -89,7 +95,7 @@ impl Variance {
    #[inline]
    pub fn population_variance(&self) -> f64 {
        let n = self.avg.len();
-        if n < 2 {
+        if n < 1 + ddof {
            return 0.;
        }
        self.sum_2 / f64::approx_from(n).unwrap()
@ -107,13 +113,13 @@ impl Variance {

 }

-impl core::default::Default for Variance {
-    fn default() -> Variance {
-        Variance::new()
+impl core::default::Default for Variance0 {
+    fn default() -> Variance0 {
+        Variance0::new()
    }
 }

-impl Estimate for Variance {
+impl Estimate for Variance0 {
    #[inline]
    fn add(&mut self, sample: f64) {
        self.increment();
@ -128,26 +134,26 @@ impl Estimate for Variance {
    }
 }

-impl Merge for Variance {
+impl Merge for Variance0 {
    /// Merge another sample into this one.
    ///
    ///
    /// ## Example
    ///
    /// ```
-    /// use average::{Variance, Merge};
+    /// use average::{Variance0, Merge};
    ///
    /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
    /// let (left, right) = sequence.split_at(3);
-    /// let avg_total: Variance = sequence.iter().collect();
-    /// let mut avg_left: Variance = left.iter().collect();
-    /// let avg_right: Variance = right.iter().collect();
+    /// let avg_total: Variance0 = sequence.iter().collect();
+    /// let mut avg_left: Variance0 = left.iter().collect();
+    /// let avg_right: Variance0 = right.iter().collect();
    /// avg_left.merge(&avg_right);
    /// assert_eq!(avg_total.mean(), avg_left.mean());
    /// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
    /// ```
    #[inline]
-    fn merge(&mut self, other: &Variance) {
+    fn merge(&mut self, other: &Variance0) {
        // This algorithm was proposed by Chan et al. in 1979.
        //
        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
@ -160,4 +166,4 @@ impl Merge for Variance {
    }
 }

-impl_from_iterator!(Variance);
+impl_from_iterator!(Variance0);
--- a/src/moments/variance1.rs
+++ b/src/moments/variance1.rs
@ -0,0 +1,169 @@
+use crate::matrix_variance_mean::Mean;
+use average::Estimate;
+use average::Merge;
+
+const ddof: f64 = 1.;
+
+/// Estimate the arithmetic mean and the variance of a sequence of numbers
+/// ("population").
+///
+/// This can be used to estimate the standard error of the mean.
+///
+///
+/// ## Example
+///
+/// ```
+/// use average::Variance1;
+///
+/// let a: Variance1 = (1..6).map(f64::from).collect();
+/// println!("The mean is {} ± {}.", a.mean(), a.error());
+/// ```
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
+pub struct Variance1 {
+    /// Estimator of average.
+    avg: Mean,
+    /// Intermediate sum of squares for calculating the variance.
+    sum_2: f64,
+}
+
+impl Variance1 {
+    /// Create a new variance estimator.
+    #[inline]
+    pub fn new() -> Variance1 {
+        Variance1 { avg: Mean::new(), sum_2: 0. }
+    }
+
+    /// Increment the sample size.
+    ///
+    /// This does not update anything else.
+    #[inline]
+    fn increment(&mut self) {
+        self.avg.increment();
+    }
+
+    /// Add an observation given an already calculated difference from the mean
+    /// divided by the number of samples, assuming the inner count of the sample
+    /// size was already updated.
+    ///
+    /// This is useful for avoiding unnecessary divisions in the inner loop.
+    #[inline]
+    fn add_inner(&mut self, delta_n: f64) {
+        // This algorithm introduced by Welford in 1962 trades numerical
+        // stability for a division inside the loop.
+        //
+        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
+        let n = f64::approx_from(self.avg.len()).unwrap();
+        self.avg.add_inner(delta_n);
+        self.sum_2 += delta_n * delta_n * n * (n - ddof);
+    }
+
+    /// Determine whether the sample is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.avg.is_empty()
+    }
+
+    /// Estimate the mean of the population.
+    ///
+    /// Returns 0 for an empty sample.
+    #[inline]
+    pub fn mean(&self) -> f64 {
+        self.avg.mean()
+    }
+
+    /// Return the sample size.
+    #[inline]
+    pub fn len(&self) -> u64 {
+        self.avg.len()
+    }
+
+    /// Calculate the sample variance.
+    ///
+    /// This is an unbiased estimator of the variance of the population.
+    #[inline]
+    pub fn sample_variance(&self) -> f64 {
+        if self.avg.len() < 1 + ddof {
+            return 0.;
+        }
+        self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap()
+    }
+
+    /// Calculate the population variance of the sample.
+    ///
+    /// This is a biased estimator of the variance of the population.
+    #[inline]
+    pub fn population_variance(&self) -> f64 {
+        let n = self.avg.len();
+        if n < 1 + ddof {
+            return 0.;
+        }
+        self.sum_2 / f64::approx_from(n).unwrap()
+    }
+
+    /// Estimate the standard error of the mean of the population.
+    #[inline]
+    pub fn error(&self) -> f64 {
+        let n = self.avg.len();
+        if n == 0 {
+            return 0.;
+        }
+        (self.sample_variance() / f64::approx_from(n).unwrap()).sqrt()
+    }
+
+}
+
+impl core::default::Default for Variance1 {
+    fn default() -> Variance1 {
+        Variance1::new()
+    }
+}
+
+impl Estimate for Variance1 {
+    #[inline]
+    fn add(&mut self, sample: f64) {
+        self.increment();
+        let delta_n = (sample - self.avg.mean())
+            / f64::approx_from(self.len()).unwrap();
+        self.add_inner(delta_n);
+    }
+
+    #[inline]
+    fn estimate(&self) -> f64 {
+        self.population_variance()
+    }
+}
+
+impl Merge for Variance1 {
+    /// Merge another sample into this one.
+    ///
+    ///
+    /// ## Example
+    ///
+    /// ```
+    /// use average::{Variance1, Merge};
+    ///
+    /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
+    /// let (left, right) = sequence.split_at(3);
+    /// let avg_total: Variance1 = sequence.iter().collect();
+    /// let mut avg_left: Variance1 = left.iter().collect();
+    /// let avg_right: Variance1 = right.iter().collect();
+    /// avg_left.merge(&avg_right);
+    /// assert_eq!(avg_total.mean(), avg_left.mean());
+    /// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
+    /// ```
+    #[inline]
+    fn merge(&mut self, other: &Variance1) {
+        // This algorithm was proposed by Chan et al. in 1979.
+        //
+        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
+        let len_self = f64::approx_from(self.len()).unwrap();
+        let len_other = f64::approx_from(other.len()).unwrap();
+        let len_total = len_self + len_other;
+        let delta = other.mean() - self.mean();
+        self.avg.merge(&other.avg);
+        self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
+    }
+}
+
+impl_from_iterator!(Variance1);