Implement kurtosis

Also improve skewness tests.
This commit is contained in:
Vinzent Steinberg 2017-05-28 23:33:16 +02:00
parent 4f5ddbd6e9
commit b8e980d6ce
6 changed files with 232 additions and 8 deletions

View File

@ -45,7 +45,7 @@ mod minmax;
mod reduce;
mod quantile;
pub use moments::{Mean, Variance, Skewness, MeanWithError};
pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError};
pub use weighted_mean::{WeightedMean, WeightedMeanWithError};
pub use minmax::{Min, Max};
pub use quantile::Quantile;

145
src/moments/kurtosis.rs Normal file
View File

@ -0,0 +1,145 @@
/// Estimate the arithmetic mean, the variance, the skewness and the kurtosis of
/// a sequence of numbers ("population").
///
/// This can be used to estimate the standard error of the mean.
#[derive(Debug, Clone)]
pub struct Kurtosis {
/// Estimator of mean, variance and skewness.
avg: Skewness,
/// Intermediate sum of terms to the fourth for calculating the skewness.
sum_4: f64,
}
impl Kurtosis {
/// Create a new skewness estimator.
#[inline]
pub fn new() -> Kurtosis {
Kurtosis {
avg: Skewness::new(),
sum_4: 0.,
}
}
/// Add an observation sampled from the population.
#[inline]
pub fn add(&mut self, x: f64) {
let delta = x - self.mean();
self.increment();
let n = f64::approx_from(self.len()).unwrap();
self.add_inner(delta, delta/n);
}
/// Increment the sample size.
///
/// This does not update anything else.
#[inline]
fn increment(&mut self) {
self.avg.increment();
}
/// Add an observation given an already calculated difference from the mean
/// divided by the number of samples, assuming the inner count of the sample
/// size was already updated.
///
/// This is useful for avoiding unnecessary divisions in the inner loop.
#[inline]
fn add_inner(&mut self, delta: f64, delta_n: f64) {
// This algorithm was suggested by Terriberry.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let n = f64::approx_from(self.len()).unwrap();
let term = delta * delta_n * (n - 1.);
let delta_n_sq = delta_n*delta_n;
self.sum_4 += term * delta_n_sq * (n*n - 3.*n + 3.)
+ 6. * delta_n_sq * self.avg.avg.sum_2
- 4. * delta_n * self.avg.sum_3;
self.avg.add_inner(delta, delta_n);
}
/// Determine whether the sample is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.avg.is_empty()
}
/// Estimate the mean of the population.
///
/// Returns 0 for an empty sample.
#[inline]
pub fn mean(&self) -> f64 {
self.avg.mean()
}
/// Return the sample size.
#[inline]
pub fn len(&self) -> u64 {
self.avg.len()
}
/// Calculate the sample variance.
///
/// This is an unbiased estimator of the variance of the population.
#[inline]
pub fn sample_variance(&self) -> f64 {
self.avg.sample_variance()
}
/// Calculate the population variance of the sample.
///
/// This is a biased estimator of the variance of the population.
#[inline]
pub fn population_variance(&self) -> f64 {
self.avg.population_variance()
}
/// Estimate the standard error of the mean of the population.
#[inline]
pub fn error_mean(&self) -> f64 {
self.avg.error_mean()
}
/// Estimate the skewness of the population.
#[inline]
pub fn skewness(&self) -> f64 {
self.avg.skewness()
}
/// Estimate the kurtosis of the population.
#[inline]
pub fn kurtosis(&self) -> f64 {
if self.sum_4 == 0. {
return 0.;
}
let n = f64::approx_from(self.len()).unwrap();
n * self.sum_4 / (self.avg.avg.sum_2 * self.avg.avg.sum_2) - 3.
}
/// Merge another sample into this one.
#[inline]
pub fn merge(&mut self, other: &Kurtosis) {
let len_self = f64::approx_from(self.len()).unwrap();
let len_other = f64::approx_from(other.len()).unwrap();
let len_total = len_self + len_other;
let delta = other.mean() - self.mean();
let delta_n = delta / len_total;
let delta_n_sq = delta_n * delta_n;
self.sum_4 += other.sum_4
+ delta * delta_n*delta_n_sq * len_self*len_other
* (len_self*len_self - len_self*len_other + len_other*len_other)
+ 6.*delta_n_sq * (len_self*len_self * other.avg.avg.sum_2 + len_other*len_other * self.avg.avg.sum_2)
+ 4.*delta_n * (len_self * other.avg.sum_3 - len_other * self.avg.sum_3);
self.avg.merge(&other.avg);
}
}
impl core::iter::FromIterator<f64> for Kurtosis {
fn from_iter<T>(iter: T) -> Kurtosis
where T: IntoIterator<Item=f64>
{
let mut a = Kurtosis::new();
for i in iter {
a.add(i);
}
a
}
}

View File

@ -1,5 +1,6 @@
include!("mean.rs");
include!("variance.rs");
include!("skewness.rs");
include!("kurtosis.rs");
pub type MeanWithError = Variance;

View File

@ -48,7 +48,8 @@ impl Skewness {
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let n = f64::approx_from(self.len()).unwrap();
self.sum_3 += delta*delta_n*delta_n * (n - 1.)*(n - 2.)
let term = delta * delta_n * (n - 1.);
self.sum_3 += term * delta_n * (n - 2.)
- 3.*delta_n * self.avg.sum_2;
self.avg.add_inner(delta_n);
}
@ -114,9 +115,10 @@ impl Skewness {
let len_other = f64::approx_from(other.len()).unwrap();
let len_total = len_self + len_other;
let delta = other.mean() - self.mean();
let delta_n = delta / len_total;
self.sum_3 += other.sum_3
+ delta*delta*delta * len_self*len_other*(len_self - len_other) / (len_total*len_total)
+ 3.*delta * (len_self*other.avg.sum_2 - len_other*self.avg.sum_2) / len_total;
+ delta*delta_n*delta_n * len_self*len_other*(len_self - len_other)
+ 3.*delta_n * (len_self * other.avg.sum_2 - len_other * self.avg.sum_2);
self.avg.merge(&other.avg);
}
}

76
tests/kurtosis.rs Normal file
View File

@ -0,0 +1,76 @@
#[macro_use] extern crate average;
extern crate core;
extern crate rand;
use core::iter::Iterator;
use average::Kurtosis;
#[test]
fn trivial() {
let mut a = Kurtosis::new();
assert_eq!(a.len(), 0);
a.add(1.0);
assert_eq!(a.mean(), 1.0);
assert_eq!(a.len(), 1);
assert_eq!(a.sample_variance(), 0.0);
assert_eq!(a.population_variance(), 0.0);
assert_eq!(a.error_mean(), 0.0);
assert_eq!(a.skewness(), 0.0);
a.add(1.0);
assert_eq!(a.mean(), 1.0);
assert_eq!(a.len(), 2);
assert_eq!(a.sample_variance(), 0.0);
assert_eq!(a.population_variance(), 0.0);
assert_eq!(a.error_mean(), 0.0);
assert_eq!(a.skewness(), 0.0);
}
#[test]
fn simple() {
let mut a: Kurtosis = (1..6).map(f64::from).collect();
assert_eq!(a.mean(), 3.0);
assert_eq!(a.len(), 5);
assert_eq!(a.sample_variance(), 2.5);
assert_almost_eq!(a.error_mean(), f64::sqrt(0.5), 1e-16);
assert_eq!(a.skewness(), 0.0);
a.add(1.0);
assert_almost_eq!(a.skewness(), 0.2795084971874741, 1e-15);
assert_almost_eq!(a.kurtosis(), -1.365, 1e-15);
}
#[test]
fn merge() {
let sequence: &[f64] = &[1., 2., 3., -4., 5.1, 6.3, 7.3, -8., 9., 1.];
for mid in 0..sequence.len() {
let (left, right) = sequence.split_at(mid);
let avg_total: Kurtosis = sequence.iter().map(|x| *x).collect();
let mut avg_left: Kurtosis = left.iter().map(|x| *x).collect();
let avg_right: Kurtosis = right.iter().map(|x| *x).collect();
avg_left.merge(&avg_right);
assert_eq!(avg_total.len(), avg_left.len());
assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14);
assert_almost_eq!(avg_total.kurtosis(), avg_left.kurtosis(), 1e-14);
}
}
#[test]
fn exponential_distribution() {
use rand::distributions::{Exp, IndependentSample};
let lambda = 2.0;
let normal = Exp::new(lambda);
let mut a = Kurtosis::new();
for _ in 0..6_000_000 {
a.add(normal.ind_sample(&mut ::rand::thread_rng()));
}
assert_almost_eq!(a.mean(), 1./lambda, 1e-2);
assert_almost_eq!(a.sample_variance().sqrt(), 1./lambda, 1e-2);
assert_almost_eq!(a.population_variance().sqrt(), 1./lambda, 1e-2);
assert_almost_eq!(a.error_mean(), 0.0, 1e-2);
assert_almost_eq!(a.skewness(), 2.0, 1e-2);
assert_almost_eq!(a.kurtosis(), 6.0, 4e-2);
}

View File

@ -42,7 +42,7 @@ fn simple() {
#[test]
fn merge() {
let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
let sequence: &[f64] = &[1., 2., 3., -4., 5., 6., 7., 8., 9., 1.];
for mid in 0..sequence.len() {
let (left, right) = sequence.split_at(mid);
let avg_total: Skewness = sequence.iter().map(|x| *x).collect();
@ -50,9 +50,9 @@ fn merge() {
let avg_right: Skewness = right.iter().map(|x| *x).collect();
avg_left.merge(&avg_right);
assert_eq!(avg_total.len(), avg_left.len());
assert_eq!(avg_total.mean(), avg_left.mean());
assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
assert_eq!(avg_total.skewness(), avg_left.skewness());
assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14);
}
}