Implement kurtosis
Also improve skewness tests.
This commit is contained in:
parent
4f5ddbd6e9
commit
b8e980d6ce
@ -45,7 +45,7 @@ mod minmax;
|
||||
mod reduce;
|
||||
mod quantile;
|
||||
|
||||
pub use moments::{Mean, Variance, Skewness, MeanWithError};
|
||||
pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError};
|
||||
pub use weighted_mean::{WeightedMean, WeightedMeanWithError};
|
||||
pub use minmax::{Min, Max};
|
||||
pub use quantile::Quantile;
|
||||
|
145
src/moments/kurtosis.rs
Normal file
145
src/moments/kurtosis.rs
Normal file
@ -0,0 +1,145 @@
|
||||
/// Estimate the arithmetic mean, the variance, the skewness and the kurtosis of
|
||||
/// a sequence of numbers ("population").
|
||||
///
|
||||
/// This can be used to estimate the standard error of the mean.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Kurtosis {
|
||||
/// Estimator of mean, variance and skewness.
|
||||
avg: Skewness,
|
||||
/// Intermediate sum of terms to the fourth for calculating the skewness.
|
||||
sum_4: f64,
|
||||
}
|
||||
|
||||
impl Kurtosis {
|
||||
/// Create a new skewness estimator.
|
||||
#[inline]
|
||||
pub fn new() -> Kurtosis {
|
||||
Kurtosis {
|
||||
avg: Skewness::new(),
|
||||
sum_4: 0.,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an observation sampled from the population.
|
||||
#[inline]
|
||||
pub fn add(&mut self, x: f64) {
|
||||
let delta = x - self.mean();
|
||||
self.increment();
|
||||
let n = f64::approx_from(self.len()).unwrap();
|
||||
self.add_inner(delta, delta/n);
|
||||
}
|
||||
|
||||
/// Increment the sample size.
|
||||
///
|
||||
/// This does not update anything else.
|
||||
#[inline]
|
||||
fn increment(&mut self) {
|
||||
self.avg.increment();
|
||||
}
|
||||
|
||||
/// Add an observation given an already calculated difference from the mean
|
||||
/// divided by the number of samples, assuming the inner count of the sample
|
||||
/// size was already updated.
|
||||
///
|
||||
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
||||
#[inline]
|
||||
fn add_inner(&mut self, delta: f64, delta_n: f64) {
|
||||
// This algorithm was suggested by Terriberry.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||
let n = f64::approx_from(self.len()).unwrap();
|
||||
let term = delta * delta_n * (n - 1.);
|
||||
let delta_n_sq = delta_n*delta_n;
|
||||
self.sum_4 += term * delta_n_sq * (n*n - 3.*n + 3.)
|
||||
+ 6. * delta_n_sq * self.avg.avg.sum_2
|
||||
- 4. * delta_n * self.avg.sum_3;
|
||||
self.avg.add_inner(delta, delta_n);
|
||||
}
|
||||
|
||||
/// Determine whether the sample is empty.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.avg.is_empty()
|
||||
}
|
||||
|
||||
/// Estimate the mean of the population.
|
||||
///
|
||||
/// Returns 0 for an empty sample.
|
||||
#[inline]
|
||||
pub fn mean(&self) -> f64 {
|
||||
self.avg.mean()
|
||||
}
|
||||
|
||||
/// Return the sample size.
|
||||
#[inline]
|
||||
pub fn len(&self) -> u64 {
|
||||
self.avg.len()
|
||||
}
|
||||
|
||||
/// Calculate the sample variance.
|
||||
///
|
||||
/// This is an unbiased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn sample_variance(&self) -> f64 {
|
||||
self.avg.sample_variance()
|
||||
}
|
||||
|
||||
/// Calculate the population variance of the sample.
|
||||
///
|
||||
/// This is a biased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn population_variance(&self) -> f64 {
|
||||
self.avg.population_variance()
|
||||
}
|
||||
|
||||
/// Estimate the standard error of the mean of the population.
|
||||
#[inline]
|
||||
pub fn error_mean(&self) -> f64 {
|
||||
self.avg.error_mean()
|
||||
}
|
||||
|
||||
/// Estimate the skewness of the population.
|
||||
#[inline]
|
||||
pub fn skewness(&self) -> f64 {
|
||||
self.avg.skewness()
|
||||
}
|
||||
|
||||
/// Estimate the kurtosis of the population.
|
||||
#[inline]
|
||||
pub fn kurtosis(&self) -> f64 {
|
||||
if self.sum_4 == 0. {
|
||||
return 0.;
|
||||
}
|
||||
let n = f64::approx_from(self.len()).unwrap();
|
||||
n * self.sum_4 / (self.avg.avg.sum_2 * self.avg.avg.sum_2) - 3.
|
||||
}
|
||||
|
||||
/// Merge another sample into this one.
|
||||
#[inline]
|
||||
pub fn merge(&mut self, other: &Kurtosis) {
|
||||
let len_self = f64::approx_from(self.len()).unwrap();
|
||||
let len_other = f64::approx_from(other.len()).unwrap();
|
||||
let len_total = len_self + len_other;
|
||||
let delta = other.mean() - self.mean();
|
||||
let delta_n = delta / len_total;
|
||||
let delta_n_sq = delta_n * delta_n;
|
||||
self.sum_4 += other.sum_4
|
||||
+ delta * delta_n*delta_n_sq * len_self*len_other
|
||||
* (len_self*len_self - len_self*len_other + len_other*len_other)
|
||||
+ 6.*delta_n_sq * (len_self*len_self * other.avg.avg.sum_2 + len_other*len_other * self.avg.avg.sum_2)
|
||||
+ 4.*delta_n * (len_self * other.avg.sum_3 - len_other * self.avg.sum_3);
|
||||
self.avg.merge(&other.avg);
|
||||
}
|
||||
}
|
||||
|
||||
impl core::iter::FromIterator<f64> for Kurtosis {
|
||||
fn from_iter<T>(iter: T) -> Kurtosis
|
||||
where T: IntoIterator<Item=f64>
|
||||
{
|
||||
let mut a = Kurtosis::new();
|
||||
for i in iter {
|
||||
a.add(i);
|
||||
}
|
||||
a
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
include!("mean.rs");
|
||||
include!("variance.rs");
|
||||
include!("skewness.rs");
|
||||
include!("kurtosis.rs");
|
||||
|
||||
pub type MeanWithError = Variance;
|
||||
|
@ -48,7 +48,8 @@ impl Skewness {
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||
let n = f64::approx_from(self.len()).unwrap();
|
||||
self.sum_3 += delta*delta_n*delta_n * (n - 1.)*(n - 2.)
|
||||
let term = delta * delta_n * (n - 1.);
|
||||
self.sum_3 += term * delta_n * (n - 2.)
|
||||
- 3.*delta_n * self.avg.sum_2;
|
||||
self.avg.add_inner(delta_n);
|
||||
}
|
||||
@ -114,9 +115,10 @@ impl Skewness {
|
||||
let len_other = f64::approx_from(other.len()).unwrap();
|
||||
let len_total = len_self + len_other;
|
||||
let delta = other.mean() - self.mean();
|
||||
let delta_n = delta / len_total;
|
||||
self.sum_3 += other.sum_3
|
||||
+ delta*delta*delta * len_self*len_other*(len_self - len_other) / (len_total*len_total)
|
||||
+ 3.*delta * (len_self*other.avg.sum_2 - len_other*self.avg.sum_2) / len_total;
|
||||
+ delta*delta_n*delta_n * len_self*len_other*(len_self - len_other)
|
||||
+ 3.*delta_n * (len_self * other.avg.sum_2 - len_other * self.avg.sum_2);
|
||||
self.avg.merge(&other.avg);
|
||||
}
|
||||
}
|
||||
|
76
tests/kurtosis.rs
Normal file
76
tests/kurtosis.rs
Normal file
@ -0,0 +1,76 @@
|
||||
#[macro_use] extern crate average;
|
||||
|
||||
extern crate core;
|
||||
|
||||
extern crate rand;
|
||||
|
||||
use core::iter::Iterator;
|
||||
|
||||
use average::Kurtosis;
|
||||
|
||||
#[test]
|
||||
fn trivial() {
|
||||
let mut a = Kurtosis::new();
|
||||
assert_eq!(a.len(), 0);
|
||||
a.add(1.0);
|
||||
assert_eq!(a.mean(), 1.0);
|
||||
assert_eq!(a.len(), 1);
|
||||
assert_eq!(a.sample_variance(), 0.0);
|
||||
assert_eq!(a.population_variance(), 0.0);
|
||||
assert_eq!(a.error_mean(), 0.0);
|
||||
assert_eq!(a.skewness(), 0.0);
|
||||
a.add(1.0);
|
||||
assert_eq!(a.mean(), 1.0);
|
||||
assert_eq!(a.len(), 2);
|
||||
assert_eq!(a.sample_variance(), 0.0);
|
||||
assert_eq!(a.population_variance(), 0.0);
|
||||
assert_eq!(a.error_mean(), 0.0);
|
||||
assert_eq!(a.skewness(), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let mut a: Kurtosis = (1..6).map(f64::from).collect();
|
||||
assert_eq!(a.mean(), 3.0);
|
||||
assert_eq!(a.len(), 5);
|
||||
assert_eq!(a.sample_variance(), 2.5);
|
||||
assert_almost_eq!(a.error_mean(), f64::sqrt(0.5), 1e-16);
|
||||
assert_eq!(a.skewness(), 0.0);
|
||||
a.add(1.0);
|
||||
assert_almost_eq!(a.skewness(), 0.2795084971874741, 1e-15);
|
||||
assert_almost_eq!(a.kurtosis(), -1.365, 1e-15);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge() {
|
||||
let sequence: &[f64] = &[1., 2., 3., -4., 5.1, 6.3, 7.3, -8., 9., 1.];
|
||||
for mid in 0..sequence.len() {
|
||||
let (left, right) = sequence.split_at(mid);
|
||||
let avg_total: Kurtosis = sequence.iter().map(|x| *x).collect();
|
||||
let mut avg_left: Kurtosis = left.iter().map(|x| *x).collect();
|
||||
let avg_right: Kurtosis = right.iter().map(|x| *x).collect();
|
||||
avg_left.merge(&avg_right);
|
||||
assert_eq!(avg_total.len(), avg_left.len());
|
||||
assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
|
||||
assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
|
||||
assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14);
|
||||
assert_almost_eq!(avg_total.kurtosis(), avg_left.kurtosis(), 1e-14);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exponential_distribution() {
|
||||
use rand::distributions::{Exp, IndependentSample};
|
||||
let lambda = 2.0;
|
||||
let normal = Exp::new(lambda);
|
||||
let mut a = Kurtosis::new();
|
||||
for _ in 0..6_000_000 {
|
||||
a.add(normal.ind_sample(&mut ::rand::thread_rng()));
|
||||
}
|
||||
assert_almost_eq!(a.mean(), 1./lambda, 1e-2);
|
||||
assert_almost_eq!(a.sample_variance().sqrt(), 1./lambda, 1e-2);
|
||||
assert_almost_eq!(a.population_variance().sqrt(), 1./lambda, 1e-2);
|
||||
assert_almost_eq!(a.error_mean(), 0.0, 1e-2);
|
||||
assert_almost_eq!(a.skewness(), 2.0, 1e-2);
|
||||
assert_almost_eq!(a.kurtosis(), 6.0, 4e-2);
|
||||
}
|
@ -42,7 +42,7 @@ fn simple() {
|
||||
|
||||
#[test]
|
||||
fn merge() {
|
||||
let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||
let sequence: &[f64] = &[1., 2., 3., -4., 5., 6., 7., 8., 9., 1.];
|
||||
for mid in 0..sequence.len() {
|
||||
let (left, right) = sequence.split_at(mid);
|
||||
let avg_total: Skewness = sequence.iter().map(|x| *x).collect();
|
||||
@ -50,9 +50,9 @@ fn merge() {
|
||||
let avg_right: Skewness = right.iter().map(|x| *x).collect();
|
||||
avg_left.merge(&avg_right);
|
||||
assert_eq!(avg_total.len(), avg_left.len());
|
||||
assert_eq!(avg_total.mean(), avg_left.mean());
|
||||
assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||
assert_eq!(avg_total.skewness(), avg_left.skewness());
|
||||
assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
|
||||
assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
|
||||
assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user