From b8e980d6ce432ed8813465201a18a31f1f2a60eb Mon Sep 17 00:00:00 2001
From: Vinzent Steinberg <Vinzent.Steinberg@gmail.com>
Date: Sun, 28 May 2017 23:33:16 +0200
Subject: [PATCH] Implement kurtosis

Also improve skewness tests.
---
 src/lib.rs              |   2 +-
 src/moments/kurtosis.rs | 145 ++++++++++++++++++++++++++++++++++++++++
 src/moments/mod.rs      |   1 +
 src/moments/skewness.rs |   8 ++-
 tests/kurtosis.rs       |  76 +++++++++++++++++++++
 tests/skewness.rs       |   8 +--
 6 files changed, 232 insertions(+), 8 deletions(-)
 create mode 100644 src/moments/kurtosis.rs
 create mode 100644 tests/kurtosis.rs

diff --git a/src/lib.rs b/src/lib.rs
index 20fb090..a77ea08 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -45,7 +45,7 @@ mod minmax;
 mod reduce;
 mod quantile;
 
-pub use moments::{Mean, Variance, Skewness, MeanWithError};
+pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError};
 pub use weighted_mean::{WeightedMean, WeightedMeanWithError};
 pub use minmax::{Min, Max};
 pub use quantile::Quantile;
diff --git a/src/moments/kurtosis.rs b/src/moments/kurtosis.rs
new file mode 100644
index 0000000..2106c5d
--- /dev/null
+++ b/src/moments/kurtosis.rs
@@ -0,0 +1,145 @@
+/// Estimate the arithmetic mean, the variance, the skewness and the kurtosis of
+/// a sequence of numbers ("population").
+///
+/// This can be used to estimate the standard error of the mean.
+#[derive(Debug, Clone)]
+pub struct Kurtosis {
+    /// Estimator of mean, variance and skewness.
+    avg: Skewness,
+    /// Intermediate sum of terms to the fourth for calculating the skewness.
+    sum_4: f64,
+}
+
+impl Kurtosis {
+    /// Create a new skewness estimator.
+    #[inline]
+    pub fn new() -> Kurtosis {
+        Kurtosis {
+            avg: Skewness::new(),
+            sum_4: 0.,
+        }
+    }
+
+    /// Add an observation sampled from the population.
+    #[inline]
+    pub fn add(&mut self, x: f64) {
+        let delta = x - self.mean();
+        self.increment();
+        let n = f64::approx_from(self.len()).unwrap();
+        self.add_inner(delta, delta/n);
+    }
+
+    /// Increment the sample size.
+    ///
+    /// This does not update anything else.
+    #[inline]
+    fn increment(&mut self) {
+        self.avg.increment();
+    }
+
+    /// Add an observation given an already calculated difference from the mean
+    /// divided by the number of samples, assuming the inner count of the sample
+    /// size was already updated.
+    ///
+    /// This is useful for avoiding unnecessary divisions in the inner loop.
+    #[inline]
+    fn add_inner(&mut self, delta: f64, delta_n: f64) {
+        // This algorithm was suggested by Terriberry.
+        //
+        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
+        let n = f64::approx_from(self.len()).unwrap();
+        let term = delta * delta_n * (n - 1.);
+        let delta_n_sq = delta_n*delta_n;
+        self.sum_4 += term * delta_n_sq * (n*n - 3.*n + 3.)
+            + 6. * delta_n_sq * self.avg.avg.sum_2
+            - 4. * delta_n * self.avg.sum_3;
+        self.avg.add_inner(delta, delta_n);
+    }
+
+    /// Determine whether the sample is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.avg.is_empty()
+    }
+
+    /// Estimate the mean of the population.
+    ///
+    /// Returns 0 for an empty sample.
+    #[inline]
+    pub fn mean(&self) -> f64 {
+        self.avg.mean()
+    }
+
+    /// Return the sample size.
+    #[inline]
+    pub fn len(&self) -> u64 {
+        self.avg.len()
+    }
+
+    /// Calculate the sample variance.
+    ///
+    /// This is an unbiased estimator of the variance of the population.
+    #[inline]
+    pub fn sample_variance(&self) -> f64 {
+        self.avg.sample_variance()
+    }
+
+    /// Calculate the population variance of the sample.
+    ///
+    /// This is a biased estimator of the variance of the population.
+    #[inline]
+    pub fn population_variance(&self) -> f64 {
+        self.avg.population_variance()
+    }
+
+    /// Estimate the standard error of the mean of the population.
+    #[inline]
+    pub fn error_mean(&self) -> f64 {
+        self.avg.error_mean()
+    }
+
+    /// Estimate the skewness of the population.
+    #[inline]
+    pub fn skewness(&self) -> f64 {
+        self.avg.skewness()
+    }
+
+    /// Estimate the kurtosis of the population.
+    #[inline]
+    pub fn kurtosis(&self) -> f64 {
+        if self.sum_4 == 0. {
+            return 0.;
+        }
+        let n = f64::approx_from(self.len()).unwrap();
+        n * self.sum_4 / (self.avg.avg.sum_2 * self.avg.avg.sum_2) - 3.
+    }
+
+    /// Merge another sample into this one.
+    #[inline]
+    pub fn merge(&mut self, other: &Kurtosis) {
+        let len_self = f64::approx_from(self.len()).unwrap();
+        let len_other = f64::approx_from(other.len()).unwrap();
+        let len_total = len_self + len_other;
+        let delta = other.mean() - self.mean();
+        let delta_n = delta / len_total;
+        let delta_n_sq = delta_n * delta_n;
+        self.sum_4 += other.sum_4
+            + delta * delta_n*delta_n_sq * len_self*len_other
+              * (len_self*len_self - len_self*len_other + len_other*len_other)
+            + 6.*delta_n_sq * (len_self*len_self * other.avg.avg.sum_2 + len_other*len_other * self.avg.avg.sum_2)
+            + 4.*delta_n * (len_self * other.avg.sum_3 - len_other * self.avg.sum_3);
+        self.avg.merge(&other.avg);
+    }
+}
+
+impl core::iter::FromIterator<f64> for Kurtosis {
+    fn from_iter<T>(iter: T) -> Kurtosis
+        where T: IntoIterator<Item=f64>
+    {
+        let mut a = Kurtosis::new();
+        for i in iter {
+            a.add(i);
+        }
+        a
+    }
+}
diff --git a/src/moments/mod.rs b/src/moments/mod.rs
index 5e7aa87..d96921d 100644
--- a/src/moments/mod.rs
+++ b/src/moments/mod.rs
@@ -1,5 +1,6 @@
 include!("mean.rs");
 include!("variance.rs");
 include!("skewness.rs");
+include!("kurtosis.rs");
 
 pub type MeanWithError = Variance;
diff --git a/src/moments/skewness.rs b/src/moments/skewness.rs
index 3245231..63ac47d 100644
--- a/src/moments/skewness.rs
+++ b/src/moments/skewness.rs
@@ -48,7 +48,8 @@ impl Skewness {
         //
         // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
         let n = f64::approx_from(self.len()).unwrap();
-        self.sum_3 += delta*delta_n*delta_n * (n - 1.)*(n - 2.)
+        let term = delta * delta_n * (n - 1.);
+        self.sum_3 += term * delta_n * (n - 2.)
             - 3.*delta_n * self.avg.sum_2;
         self.avg.add_inner(delta_n);
     }
@@ -114,9 +115,10 @@ impl Skewness {
         let len_other = f64::approx_from(other.len()).unwrap();
         let len_total = len_self + len_other;
         let delta = other.mean() - self.mean();
+        let delta_n = delta / len_total;
         self.sum_3 += other.sum_3
-            + delta*delta*delta * len_self*len_other*(len_self - len_other) / (len_total*len_total)
-            + 3.*delta * (len_self*other.avg.sum_2 - len_other*self.avg.sum_2) / len_total;
+            + delta*delta_n*delta_n * len_self*len_other*(len_self - len_other)
+            + 3.*delta_n * (len_self * other.avg.sum_2 - len_other * self.avg.sum_2);
         self.avg.merge(&other.avg);
     }
 }
diff --git a/tests/kurtosis.rs b/tests/kurtosis.rs
new file mode 100644
index 0000000..0632464
--- /dev/null
+++ b/tests/kurtosis.rs
@@ -0,0 +1,76 @@
+#[macro_use] extern crate average;
+
+extern crate core;
+
+extern crate rand;
+
+use core::iter::Iterator;
+
+use average::Kurtosis;
+
+#[test]
+fn trivial() {
+    let mut a = Kurtosis::new();
+    assert_eq!(a.len(), 0);
+    a.add(1.0);
+    assert_eq!(a.mean(), 1.0);
+    assert_eq!(a.len(), 1);
+    assert_eq!(a.sample_variance(), 0.0);
+    assert_eq!(a.population_variance(), 0.0);
+    assert_eq!(a.error_mean(), 0.0);
+    assert_eq!(a.skewness(), 0.0);
+    a.add(1.0);
+    assert_eq!(a.mean(), 1.0);
+    assert_eq!(a.len(), 2);
+    assert_eq!(a.sample_variance(), 0.0);
+    assert_eq!(a.population_variance(), 0.0);
+    assert_eq!(a.error_mean(), 0.0);
+    assert_eq!(a.skewness(), 0.0);
+}
+
+#[test]
+fn simple() {
+    let mut a: Kurtosis = (1..6).map(f64::from).collect();
+    assert_eq!(a.mean(), 3.0);
+    assert_eq!(a.len(), 5);
+    assert_eq!(a.sample_variance(), 2.5);
+    assert_almost_eq!(a.error_mean(), f64::sqrt(0.5), 1e-16);
+    assert_eq!(a.skewness(), 0.0);
+    a.add(1.0);
+    assert_almost_eq!(a.skewness(), 0.2795084971874741, 1e-15);
+    assert_almost_eq!(a.kurtosis(), -1.365, 1e-15);
+}
+
+#[test]
+fn merge() {
+    let sequence: &[f64] = &[1., 2., 3., -4., 5.1, 6.3, 7.3, -8., 9., 1.];
+    for mid in 0..sequence.len() {
+        let (left, right) = sequence.split_at(mid);
+        let avg_total: Kurtosis = sequence.iter().map(|x| *x).collect();
+        let mut avg_left: Kurtosis = left.iter().map(|x| *x).collect();
+        let avg_right: Kurtosis = right.iter().map(|x| *x).collect();
+        avg_left.merge(&avg_right);
+        assert_eq!(avg_total.len(), avg_left.len());
+        assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
+        assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
+        assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14);
+        assert_almost_eq!(avg_total.kurtosis(), avg_left.kurtosis(), 1e-14);
+    }
+}
+
+#[test]
+fn exponential_distribution() {
+    use rand::distributions::{Exp, IndependentSample};
+    let lambda = 2.0;
+    let normal = Exp::new(lambda);
+    let mut a = Kurtosis::new();
+    for _ in 0..6_000_000 {
+        a.add(normal.ind_sample(&mut ::rand::thread_rng()));
+    }
+    assert_almost_eq!(a.mean(), 1./lambda, 1e-2);
+    assert_almost_eq!(a.sample_variance().sqrt(), 1./lambda, 1e-2);
+    assert_almost_eq!(a.population_variance().sqrt(), 1./lambda, 1e-2);
+    assert_almost_eq!(a.error_mean(), 0.0, 1e-2);
+    assert_almost_eq!(a.skewness(), 2.0, 1e-2);
+    assert_almost_eq!(a.kurtosis(), 6.0, 4e-2);
+}
diff --git a/tests/skewness.rs b/tests/skewness.rs
index 80d41b2..a8979d2 100644
--- a/tests/skewness.rs
+++ b/tests/skewness.rs
@@ -42,7 +42,7 @@ fn simple() {
 
 #[test]
 fn merge() {
-    let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
+    let sequence: &[f64] = &[1., 2., 3., -4., 5., 6., 7., 8., 9., 1.];
     for mid in 0..sequence.len() {
         let (left, right) = sequence.split_at(mid);
         let avg_total: Skewness = sequence.iter().map(|x| *x).collect();
@@ -50,9 +50,9 @@ fn merge() {
         let avg_right: Skewness = right.iter().map(|x| *x).collect();
         avg_left.merge(&avg_right);
         assert_eq!(avg_total.len(), avg_left.len());
-        assert_eq!(avg_total.mean(), avg_left.mean());
-        assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
-        assert_eq!(avg_total.skewness(), avg_left.skewness());
+        assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
+        assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
+        assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14);
     }
 }