141 lines
3.9 KiB
Rust
141 lines
3.9 KiB
Rust
/// Estimate the arithmetic mean, the variance and the skewness of a sequence of
|
|
/// numbers ("population").
|
|
///
|
|
/// This can be used to estimate the standard error of the mean.
|
|
#[derive(Debug, Clone)]
|
|
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
|
|
pub struct Skewness {
|
|
/// Estimator of mean and variance.
|
|
avg: MeanWithError,
|
|
/// Intermediate sum of cubes for calculating the skewness.
|
|
sum_3: f64,
|
|
}
|
|
|
|
impl Skewness {
|
|
/// Create a new skewness estimator.
|
|
#[inline]
|
|
pub fn new() -> Skewness {
|
|
Skewness {
|
|
avg: MeanWithError::new(),
|
|
sum_3: 0.,
|
|
}
|
|
}
|
|
|
|
/// Increment the sample size.
|
|
///
|
|
/// This does not update anything else.
|
|
#[inline]
|
|
fn increment(&mut self) {
|
|
self.avg.increment();
|
|
}
|
|
|
|
/// Add an observation given an already calculated difference from the mean
|
|
/// divided by the number of samples, assuming the inner count of the sample
|
|
/// size was already updated.
|
|
///
|
|
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
|
#[inline]
|
|
fn add_inner(&mut self, delta: f64, delta_n: f64) {
|
|
// This algorithm was suggested by Terriberry.
|
|
//
|
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
|
let n = f64::approx_from(self.len()).unwrap();
|
|
let term = delta * delta_n * (n - 1.);
|
|
self.sum_3 += term * delta_n * (n - 2.)
|
|
- 3.*delta_n * self.avg.sum_2;
|
|
self.avg.add_inner(delta_n);
|
|
}
|
|
|
|
/// Determine whether the sample is empty.
|
|
#[inline]
|
|
pub fn is_empty(&self) -> bool {
|
|
self.avg.is_empty()
|
|
}
|
|
|
|
/// Estimate the mean of the population.
|
|
///
|
|
/// Returns 0 for an empty sample.
|
|
#[inline]
|
|
pub fn mean(&self) -> f64 {
|
|
self.avg.mean()
|
|
}
|
|
|
|
/// Return the sample size.
|
|
#[inline]
|
|
pub fn len(&self) -> u64 {
|
|
self.avg.len()
|
|
}
|
|
|
|
/// Calculate the sample variance.
|
|
///
|
|
/// This is an unbiased estimator of the variance of the population.
|
|
#[inline]
|
|
pub fn sample_variance(&self) -> f64 {
|
|
self.avg.sample_variance()
|
|
}
|
|
|
|
/// Calculate the population variance of the sample.
|
|
///
|
|
/// This is a biased estimator of the variance of the population.
|
|
#[inline]
|
|
pub fn population_variance(&self) -> f64 {
|
|
self.avg.population_variance()
|
|
}
|
|
|
|
/// Estimate the standard error of the mean of the population.
|
|
#[inline]
|
|
pub fn error_mean(&self) -> f64 {
|
|
self.avg.error()
|
|
}
|
|
|
|
/// Estimate the skewness of the population.
|
|
#[inline]
|
|
pub fn skewness(&self) -> f64 {
|
|
if self.sum_3 == 0. {
|
|
return 0.;
|
|
}
|
|
let n = f64::approx_from(self.len()).unwrap();
|
|
let sum_2 = self.avg.sum_2;
|
|
debug_assert_ne!(sum_2, 0.);
|
|
n.sqrt() * self.sum_3 / (sum_2*sum_2*sum_2).sqrt()
|
|
}
|
|
}
|
|
|
|
impl Default for Skewness {
|
|
fn default() -> Skewness {
|
|
Skewness::new()
|
|
}
|
|
}
|
|
|
|
impl Estimate for Skewness {
|
|
#[inline]
|
|
fn add(&mut self, x: f64) {
|
|
let delta = x - self.mean();
|
|
self.increment();
|
|
let n = f64::approx_from(self.len()).unwrap();
|
|
self.add_inner(delta, delta/n);
|
|
}
|
|
|
|
#[inline]
|
|
fn estimate(&self) -> f64 {
|
|
self.skewness()
|
|
}
|
|
}
|
|
|
|
impl Merge for Skewness {
|
|
#[inline]
|
|
fn merge(&mut self, other: &Skewness) {
|
|
let len_self = f64::approx_from(self.len()).unwrap();
|
|
let len_other = f64::approx_from(other.len()).unwrap();
|
|
let len_total = len_self + len_other;
|
|
let delta = other.mean() - self.mean();
|
|
let delta_n = delta / len_total;
|
|
self.sum_3 += other.sum_3
|
|
+ delta*delta_n*delta_n * len_self*len_other*(len_self - len_other)
|
|
+ 3.*delta_n * (len_self * other.avg.sum_2 - len_other * self.avg.sum_2);
|
|
self.avg.merge(&other.avg);
|
|
}
|
|
}
|
|
|
|
impl_from_iterator!(Skewness);
|