Implement skewness and add missing inline annotations
This commit is contained in:
parent
8c6c55190e
commit
4f5ddbd6e9
@ -45,7 +45,7 @@ mod minmax;
|
||||
mod reduce;
|
||||
mod quantile;
|
||||
|
||||
pub use moments::{Mean, Variance, MeanWithError};
|
||||
pub use moments::{Mean, Variance, Skewness, MeanWithError};
|
||||
pub use weighted_mean::{WeightedMean, WeightedMeanWithError};
|
||||
pub use minmax::{Min, Max};
|
||||
pub use quantile::Quantile;
|
||||
|
@ -51,6 +51,7 @@ impl Mean {
|
||||
/// size was already updated.
|
||||
///
|
||||
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
||||
#[inline]
|
||||
fn add_inner(&mut self, delta_n: f64) {
|
||||
// This algorithm introduced by Welford in 1962 trades numerical
|
||||
// stability for a division inside the loop.
|
||||
|
@ -1,4 +1,5 @@
|
||||
include!("mean.rs");
|
||||
include!("variance.rs");
|
||||
include!("skewness.rs");
|
||||
|
||||
pub type MeanWithError = Variance;
|
||||
|
134
src/moments/skewness.rs
Normal file
134
src/moments/skewness.rs
Normal file
@ -0,0 +1,134 @@
|
||||
/// Estimate the arithmetic mean, the variance and the skewness of a sequence of
|
||||
/// numbers ("population").
|
||||
///
|
||||
/// This can be used to estimate the standard error of the mean.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Skewness {
|
||||
/// Estimator of mean and variance.
|
||||
avg: MeanWithError,
|
||||
/// Intermediate sum of cubes for calculating the skewness.
|
||||
sum_3: f64,
|
||||
}
|
||||
|
||||
impl Skewness {
|
||||
/// Create a new skewness estimator.
|
||||
#[inline]
|
||||
pub fn new() -> Skewness {
|
||||
Skewness {
|
||||
avg: MeanWithError::new(),
|
||||
sum_3: 0.,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an observation sampled from the population.
|
||||
#[inline]
|
||||
pub fn add(&mut self, x: f64) {
|
||||
let delta = x - self.mean();
|
||||
self.increment();
|
||||
let n = f64::approx_from(self.len()).unwrap();
|
||||
self.add_inner(delta, delta/n);
|
||||
}
|
||||
|
||||
/// Increment the sample size.
|
||||
///
|
||||
/// This does not update anything else.
|
||||
#[inline]
|
||||
fn increment(&mut self) {
|
||||
self.avg.increment();
|
||||
}
|
||||
|
||||
/// Add an observation given an already calculated difference from the mean
|
||||
/// divided by the number of samples, assuming the inner count of the sample
|
||||
/// size was already updated.
|
||||
///
|
||||
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
||||
#[inline]
|
||||
fn add_inner(&mut self, delta: f64, delta_n: f64) {
|
||||
// This algorithm was suggested by Terriberry.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||
let n = f64::approx_from(self.len()).unwrap();
|
||||
self.sum_3 += delta*delta_n*delta_n * (n - 1.)*(n - 2.)
|
||||
- 3.*delta_n * self.avg.sum_2;
|
||||
self.avg.add_inner(delta_n);
|
||||
}
|
||||
|
||||
/// Determine whether the sample is empty.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.avg.is_empty()
|
||||
}
|
||||
|
||||
/// Estimate the mean of the population.
|
||||
///
|
||||
/// Returns 0 for an empty sample.
|
||||
#[inline]
|
||||
pub fn mean(&self) -> f64 {
|
||||
self.avg.mean()
|
||||
}
|
||||
|
||||
/// Return the sample size.
|
||||
#[inline]
|
||||
pub fn len(&self) -> u64 {
|
||||
self.avg.len()
|
||||
}
|
||||
|
||||
/// Calculate the sample variance.
|
||||
///
|
||||
/// This is an unbiased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn sample_variance(&self) -> f64 {
|
||||
self.avg.sample_variance()
|
||||
}
|
||||
|
||||
/// Calculate the population variance of the sample.
|
||||
///
|
||||
/// This is a biased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn population_variance(&self) -> f64 {
|
||||
self.avg.population_variance()
|
||||
}
|
||||
|
||||
/// Estimate the standard error of the mean of the population.
|
||||
#[inline]
|
||||
pub fn error_mean(&self) -> f64 {
|
||||
self.avg.error()
|
||||
}
|
||||
|
||||
/// Estimate the skewness of the population.
|
||||
#[inline]
|
||||
pub fn skewness(&self) -> f64 {
|
||||
if self.sum_3 == 0. {
|
||||
return 0.;
|
||||
}
|
||||
let n = f64::approx_from(self.len()).unwrap();
|
||||
let sum_2 = self.avg.sum_2;
|
||||
debug_assert!(sum_2 != 0.);
|
||||
n.sqrt() * self.sum_3 / (sum_2*sum_2*sum_2).sqrt()
|
||||
}
|
||||
|
||||
/// Merge another sample into this one.
|
||||
#[inline]
|
||||
pub fn merge(&mut self, other: &Skewness) {
|
||||
let len_self = f64::approx_from(self.len()).unwrap();
|
||||
let len_other = f64::approx_from(other.len()).unwrap();
|
||||
let len_total = len_self + len_other;
|
||||
let delta = other.mean() - self.mean();
|
||||
self.sum_3 += other.sum_3
|
||||
+ delta*delta*delta * len_self*len_other*(len_self - len_other) / (len_total*len_total)
|
||||
+ 3.*delta * (len_self*other.avg.sum_2 - len_other*self.avg.sum_2) / len_total;
|
||||
self.avg.merge(&other.avg);
|
||||
}
|
||||
}
|
||||
|
||||
impl core::iter::FromIterator<f64> for Skewness {
|
||||
fn from_iter<T>(iter: T) -> Skewness
|
||||
where T: IntoIterator<Item=f64>
|
||||
{
|
||||
let mut a = Skewness::new();
|
||||
for i in iter {
|
||||
a.add(i);
|
||||
}
|
||||
a
|
||||
}
|
||||
}
|
@ -21,7 +21,8 @@ pub struct Variance {
|
||||
}
|
||||
|
||||
impl Variance {
|
||||
/// Create a new average estimator.
|
||||
/// Create a new variance estimator.
|
||||
#[inline]
|
||||
pub fn new() -> Variance {
|
||||
Variance { avg: Mean::new(), sum_2: 0. }
|
||||
}
|
||||
@ -48,6 +49,7 @@ impl Variance {
|
||||
/// size was already updated.
|
||||
///
|
||||
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
||||
#[inline]
|
||||
fn add_inner(&mut self, delta_n: f64) {
|
||||
// This algorithm introduced by Welford in 1962 trades numerical
|
||||
// stability for a division inside the loop.
|
||||
|
73
tests/skewness.rs
Normal file
73
tests/skewness.rs
Normal file
@ -0,0 +1,73 @@
|
||||
#[macro_use] extern crate average;
|
||||
|
||||
extern crate core;
|
||||
|
||||
extern crate rand;
|
||||
|
||||
use core::iter::Iterator;
|
||||
|
||||
use average::Skewness;
|
||||
|
||||
#[test]
|
||||
fn trivial() {
|
||||
let mut a = Skewness::new();
|
||||
assert_eq!(a.len(), 0);
|
||||
a.add(1.0);
|
||||
assert_eq!(a.mean(), 1.0);
|
||||
assert_eq!(a.len(), 1);
|
||||
assert_eq!(a.sample_variance(), 0.0);
|
||||
assert_eq!(a.population_variance(), 0.0);
|
||||
assert_eq!(a.error_mean(), 0.0);
|
||||
assert_eq!(a.skewness(), 0.0);
|
||||
a.add(1.0);
|
||||
assert_eq!(a.mean(), 1.0);
|
||||
assert_eq!(a.len(), 2);
|
||||
assert_eq!(a.sample_variance(), 0.0);
|
||||
assert_eq!(a.population_variance(), 0.0);
|
||||
assert_eq!(a.error_mean(), 0.0);
|
||||
assert_eq!(a.skewness(), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let mut a: Skewness = (1..6).map(f64::from).collect();
|
||||
assert_eq!(a.mean(), 3.0);
|
||||
assert_eq!(a.len(), 5);
|
||||
assert_eq!(a.sample_variance(), 2.5);
|
||||
assert_almost_eq!(a.error_mean(), f64::sqrt(0.5), 1e-16);
|
||||
assert_eq!(a.skewness(), 0.0);
|
||||
a.add(1.0);
|
||||
assert_almost_eq!(a.skewness(), 0.2795084971874741, 1e-15);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge() {
|
||||
let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||
for mid in 0..sequence.len() {
|
||||
let (left, right) = sequence.split_at(mid);
|
||||
let avg_total: Skewness = sequence.iter().map(|x| *x).collect();
|
||||
let mut avg_left: Skewness = left.iter().map(|x| *x).collect();
|
||||
let avg_right: Skewness = right.iter().map(|x| *x).collect();
|
||||
avg_left.merge(&avg_right);
|
||||
assert_eq!(avg_total.len(), avg_left.len());
|
||||
assert_eq!(avg_total.mean(), avg_left.mean());
|
||||
assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||
assert_eq!(avg_total.skewness(), avg_left.skewness());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exponential_distribution() {
|
||||
use rand::distributions::{Exp, IndependentSample};
|
||||
let lambda = 2.0;
|
||||
let normal = Exp::new(lambda);
|
||||
let mut a = Skewness::new();
|
||||
for _ in 0..2_000_000 {
|
||||
a.add(normal.ind_sample(&mut ::rand::thread_rng()));
|
||||
}
|
||||
assert_almost_eq!(a.mean(), 1./lambda, 1e-2);
|
||||
assert_almost_eq!(a.sample_variance().sqrt(), 1./lambda, 1e-2);
|
||||
assert_almost_eq!(a.population_variance().sqrt(), 1./lambda, 1e-2);
|
||||
assert_almost_eq!(a.error_mean(), 0.0, 1e-2);
|
||||
assert_almost_eq!(a.skewness(), 2.0, 1e-2);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user