rust-average/src/moments/skewness.rs

141 lines
3.9 KiB
Rust

/// Estimate the arithmetic mean, the variance and the skewness of a sequence of
/// numbers ("population").
///
/// This can be used to estimate the standard error of the mean.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct Skewness {
/// Estimator of mean and variance.
avg: MeanWithError,
/// Intermediate sum of cubes for calculating the skewness.
sum_3: f64,
}
impl Skewness {
/// Create a new skewness estimator.
#[inline]
pub fn new() -> Skewness {
Skewness {
avg: MeanWithError::new(),
sum_3: 0.,
}
}
/// Increment the sample size.
///
/// This does not update anything else.
#[inline]
fn increment(&mut self) {
self.avg.increment();
}
/// Add an observation given an already calculated difference from the mean
/// divided by the number of samples, assuming the inner count of the sample
/// size was already updated.
///
/// This is useful for avoiding unnecessary divisions in the inner loop.
#[inline]
fn add_inner(&mut self, delta: f64, delta_n: f64) {
// This algorithm was suggested by Terriberry.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let n = f64::approx_from(self.len()).unwrap();
let term = delta * delta_n * (n - 1.);
self.sum_3 += term * delta_n * (n - 2.)
- 3.*delta_n * self.avg.sum_2;
self.avg.add_inner(delta_n);
}
/// Determine whether the sample is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.avg.is_empty()
}
/// Estimate the mean of the population.
///
/// Returns 0 for an empty sample.
#[inline]
pub fn mean(&self) -> f64 {
self.avg.mean()
}
/// Return the sample size.
#[inline]
pub fn len(&self) -> u64 {
self.avg.len()
}
/// Calculate the sample variance.
///
/// This is an unbiased estimator of the variance of the population.
#[inline]
pub fn sample_variance(&self) -> f64 {
self.avg.sample_variance()
}
/// Calculate the population variance of the sample.
///
/// This is a biased estimator of the variance of the population.
#[inline]
pub fn population_variance(&self) -> f64 {
self.avg.population_variance()
}
/// Estimate the standard error of the mean of the population.
#[inline]
pub fn error_mean(&self) -> f64 {
self.avg.error()
}
/// Estimate the skewness of the population.
#[inline]
pub fn skewness(&self) -> f64 {
if self.sum_3 == 0. {
return 0.;
}
let n = f64::approx_from(self.len()).unwrap();
let sum_2 = self.avg.sum_2;
debug_assert_ne!(sum_2, 0.);
n.sqrt() * self.sum_3 / (sum_2*sum_2*sum_2).sqrt()
}
}
impl Default for Skewness {
fn default() -> Skewness {
Skewness::new()
}
}
impl Estimate for Skewness {
#[inline]
fn add(&mut self, x: f64) {
let delta = x - self.mean();
self.increment();
let n = f64::approx_from(self.len()).unwrap();
self.add_inner(delta, delta/n);
}
#[inline]
fn estimate(&self) -> f64 {
self.skewness()
}
}
impl Merge for Skewness {
#[inline]
fn merge(&mut self, other: &Skewness) {
let len_self = f64::approx_from(self.len()).unwrap();
let len_other = f64::approx_from(other.len()).unwrap();
let len_total = len_self + len_other;
let delta = other.mean() - self.mean();
let delta_n = delta / len_total;
self.sum_3 += other.sum_3
+ delta*delta_n*delta_n * len_self*len_other*(len_self - len_other)
+ 3.*delta_n * (len_self * other.avg.sum_2 - len_other * self.avg.sum_2);
self.avg.merge(&other.avg);
}
}
impl_from_iterator!(Skewness);