From 19127cede7c0556e0fad5d984b0713a6644b1560 Mon Sep 17 00:00:00 2001
From: Vinzent Steinberg <Vinzent.Steinberg@gmail.com>
Date: Sun, 28 May 2017 20:30:05 +0200
Subject: [PATCH] Calculate average in terms of delta/n

This will avoid divisions in the inner loop when calculating higher
moments.
---
 src/average.rs           | 50 +++++++++++++++++++++++++++++++++++-----
 tests/streaming_stats.rs |  2 +-
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/src/average.rs b/src/average.rs
index 02996f4..d53acb3 100644
--- a/src/average.rs
+++ b/src/average.rs
@@ -32,13 +32,31 @@ impl Average {
     /// Add an observation sampled from the population.
     #[inline]
     pub fn add(&mut self, sample: f64) {
+        self.increment();
+        let delta_n = (sample - self.avg)
+            / f64::approx_from(self.n).unwrap();
+        self.add_inner(delta_n);
+    }
+
+    /// Increment the sample size.
+    ///
+    /// This does not update anything else.
+    #[inline]
+    pub fn increment(&mut self) {
+        self.n += 1;
+    }
+
+    /// Add an observation given an already calculated difference from the mean
+    /// divided by the number of samples, assuming the inner count of the sample
+    /// size was already updated.
+    ///
+    /// This is useful for avoiding unnecessary divisions in the inner loop.
+    pub fn add_inner(&mut self, delta_n: f64) {
         // This algorithm introduced by Welford in 1962 trades numerical
         // stability for a division inside the loop.
         //
         // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
-        self.n += 1;
-        let delta = sample - self.avg;
-        self.avg += delta / f64::approx_from(self.n).unwrap();
+        self.avg += delta_n;
     }
 
     /// Determine whether the sample is empty.
@@ -144,13 +162,33 @@ impl AverageWithError {
     /// Add an observation sampled from the population.
     #[inline]
     pub fn add(&mut self, sample: f64) {
+        self.increment();
+        let delta_n = (sample - self.avg.mean())
+            / f64::approx_from(self.len()).unwrap();
+        self.add_inner(delta_n);
+    }
+
+    /// Increment the sample size.
+    ///
+    /// This does not update anything else.
+    #[inline]
+    pub fn increment(&mut self) {
+        self.avg.increment();
+    }
+
+    /// Add an observation given an already calculated difference from the mean
+    /// divided by the number of samples, assuming the inner count of the sample
+    /// size was already updated.
+    ///
+    /// This is useful for avoiding unnecessary divisions in the inner loop.
+    pub fn add_inner(&mut self, delta_n: f64) {
         // This algorithm introduced by Welford in 1962 trades numerical
         // stability for a division inside the loop.
         //
         // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
-        let delta = sample - self.avg.mean();
-        self.avg.add(sample);
-        self.v += delta * (sample - self.avg.mean());
+        let n = f64::approx_from(self.avg.len()).unwrap();
+        self.avg.add_inner(delta_n);
+        self.v += delta_n * delta_n * n * (n - 1.);
     }
 
     /// Determine whether the sample is empty.
diff --git a/tests/streaming_stats.rs b/tests/streaming_stats.rs
index a278f7e..74b3680 100644
--- a/tests/streaming_stats.rs
+++ b/tests/streaming_stats.rs
@@ -22,7 +22,7 @@ fn average_vs_streaming_stats_small() {
     let a: average::AverageWithError = values.iter().map(|x| *x).collect();
     let b: stats::OnlineStats = values.iter().map(|x| *x).collect();
     assert_almost_eq!(a.mean(), b.mean(), 1e-16);
-    assert_almost_eq!(a.population_variance(), b.variance(), 1e-16);
+    assert_almost_eq!(a.population_variance(), b.variance(), 1e-14);
 }
 
 #[test]