From 19127cede7c0556e0fad5d984b0713a6644b1560 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Sun, 28 May 2017 20:30:05 +0200 Subject: [PATCH] Calculate average in terms of delta/n This will avoid divisions in the inner loop when calculating higher moments. --- src/average.rs | 50 +++++++++++++++++++++++++++++++++++----- tests/streaming_stats.rs | 2 +- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/average.rs b/src/average.rs index 02996f4..d53acb3 100644 --- a/src/average.rs +++ b/src/average.rs @@ -32,13 +32,31 @@ impl Average { /// Add an observation sampled from the population. #[inline] pub fn add(&mut self, sample: f64) { + self.increment(); + let delta_n = (sample - self.avg) + / f64::approx_from(self.n).unwrap(); + self.add_inner(delta_n); + } + + /// Increment the sample size. + /// + /// This does not update anything else. + #[inline] + pub fn increment(&mut self) { + self.n += 1; + } + + /// Add an observation given an already calculated difference from the mean + /// divided by the number of samples, assuming the inner count of the sample + /// size was already updated. + /// + /// This is useful for avoiding unnecessary divisions in the inner loop. + pub fn add_inner(&mut self, delta_n: f64) { // This algorithm introduced by Welford in 1962 trades numerical // stability for a division inside the loop. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. - self.n += 1; - let delta = sample - self.avg; - self.avg += delta / f64::approx_from(self.n).unwrap(); + self.avg += delta_n; } /// Determine whether the sample is empty. @@ -144,13 +162,33 @@ impl AverageWithError { /// Add an observation sampled from the population. #[inline] pub fn add(&mut self, sample: f64) { + self.increment(); + let delta_n = (sample - self.avg.mean()) + / f64::approx_from(self.len()).unwrap(); + self.add_inner(delta_n); + } + + /// Increment the sample size. + /// + /// This does not update anything else. + #[inline] + pub fn increment(&mut self) { + self.avg.increment(); + } + + /// Add an observation given an already calculated difference from the mean + /// divided by the number of samples, assuming the inner count of the sample + /// size was already updated. + /// + /// This is useful for avoiding unnecessary divisions in the inner loop. + pub fn add_inner(&mut self, delta_n: f64) { // This algorithm introduced by Welford in 1962 trades numerical // stability for a division inside the loop. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. - let delta = sample - self.avg.mean(); - self.avg.add(sample); - self.v += delta * (sample - self.avg.mean()); + let n = f64::approx_from(self.avg.len()).unwrap(); + self.avg.add_inner(delta_n); + self.v += delta_n * delta_n * n * (n - 1.); } /// Determine whether the sample is empty. diff --git a/tests/streaming_stats.rs b/tests/streaming_stats.rs index a278f7e..74b3680 100644 --- a/tests/streaming_stats.rs +++ b/tests/streaming_stats.rs @@ -22,7 +22,7 @@ fn average_vs_streaming_stats_small() { let a: average::AverageWithError = values.iter().map(|x| *x).collect(); let b: stats::OnlineStats = values.iter().map(|x| *x).collect(); assert_almost_eq!(a.mean(), b.mean(), 1e-16); - assert_almost_eq!(a.population_variance(), b.variance(), 1e-16); + assert_almost_eq!(a.population_variance(), b.variance(), 1e-14); } #[test]