Implement incremental calculation of arbitrary moments

This commit is contained in:
Vinzent Steinberg 2018-01-11 18:29:43 +01:00
parent d9f0b056c6
commit c1fab4722c
5 changed files with 330 additions and 20 deletions

View File

@ -1,30 +1,39 @@
[package] [package]
authors = ["Vinzent Steinberg <Vinzent.Steinberg@gmail.com>"] authors = ["Vinzent Steinberg <Vinzent.Steinberg@gmail.com>"]
name = "average" categories = ["science", "no-std"]
version = "0.7.0"
license = "MIT/Apache-2.0"
repository = "https://github.com/vks/average"
description = "Calculate statistics iteratively" description = "Calculate statistics iteratively"
documentation = "https://docs.rs/average" documentation = "https://docs.rs/average"
readme = "README.md"
categories = ["science", "no-std"]
keywords = ["stats", "mean", "skewness", "kurtosis", "quantile"] keywords = ["stats", "mean", "skewness", "kurtosis", "quantile"]
license = "MIT/Apache-2.0"
name = "average"
readme = "README.md"
repository = "https://github.com/vks/average"
version = "0.7.0"
[[bench]]
harness = false
name = "mean"
[[bench]]
harness = false
name = "min"
[dependencies] [dependencies]
conv = { version = "0.3", default-features = false } num-traits = "0.1"
num-integer = "0.1"
quickersort = "3" quickersort = "3"
serde = { version = "1", optional = true, features = ["derive"]}
[dependencies.conv]
default-features = false
version = "0.3"
[dependencies.serde]
features = ["derive"]
optional = true
version = "1"
[dev-dependencies] [dev-dependencies]
bencher = "0.1" bencher = "0.1"
rand = "0.3" rand = "0.3"
streaming-stats = "0.1"
serde_json = "1" serde_json = "1"
streaming-stats = "0.1"
[[bench]]
name = "mean"
harness = false
[[bench]]
name = "min"
harness = false

View File

@ -70,16 +70,20 @@
//! [`Min`]: ./struct.Min.html //! [`Min`]: ./struct.Min.html
//! [`Max`]: ./struct.Max.html //! [`Max`]: ./struct.Max.html
//! [`concatenate`]: ./macro.concatenate.html //! [`concatenate`]: ./macro.concatenate.html
#![feature(inclusive_range_syntax)]
#![cfg_attr(feature = "cargo-clippy", allow(float_cmp))] #![cfg_attr(feature = "cargo-clippy", allow(float_cmp))]
#![no_std] //#![no_std]
extern crate core;
extern crate conv; extern crate conv;
extern crate quickersort; extern crate quickersort;
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
#[macro_use] #[macro_use]
extern crate serde; extern crate serde;
extern crate num_traits;
extern crate num_integer;
#[macro_use] mod macros; #[macro_use] mod macros;
mod moments; mod moments;
@ -88,7 +92,7 @@ mod minmax;
mod quantile; mod quantile;
mod traits; mod traits;
pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError}; pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError, Moments};
pub use weighted_mean::{WeightedMean, WeightedMeanWithError}; pub use weighted_mean::{WeightedMean, WeightedMeanWithError};
pub use minmax::{Min, Max}; pub use minmax::{Min, Max};
pub use quantile::Quantile; pub use quantile::Quantile;

View File

@ -46,6 +46,7 @@ impl Kurtosis {
+ 6. * delta_n_sq * self.avg.avg.sum_2 + 6. * delta_n_sq * self.avg.avg.sum_2
- 4. * delta_n * self.avg.sum_3; - 4. * delta_n * self.avg.sum_3;
self.avg.add_inner(delta, delta_n); self.avg.add_inner(delta, delta_n);
println!("skewness={} kurtosis={}", self.skewness(), self.kurtosis());
} }
/// Determine whether the sample is empty. /// Determine whether the sample is empty.
@ -96,7 +97,7 @@ impl Kurtosis {
self.avg.skewness() self.avg.skewness()
} }
/// Estimate the kurtosis of the population. /// Estimate the excess kurtosis of the population.
#[inline] #[inline]
pub fn kurtosis(&self) -> f64 { pub fn kurtosis(&self) -> f64 {
if self.sum_4 == 0. { if self.sum_4 == 0. {

View File

@ -1,6 +1,8 @@
use core; use core;
use conv::ApproxFrom; use conv::ApproxFrom;
use num_traits::pow;
use num_integer::binomial;
use super::{Estimate, Merge}; use super::{Estimate, Merge};
@ -14,3 +16,210 @@ include!("kurtosis.rs");
/// Alias for `Variance`. /// Alias for `Variance`.
pub type MeanWithError = Variance; pub type MeanWithError = Variance;
const MAX_P: usize = 4;
pub struct Moments {
/// Number of samples.
///
/// Technically, this is the same as m_0, but we want this to be an integer
/// to avoid numerical issues, so we store it separately.
n: u64,
/// Average.
avg: f64,
/// Moments times `n`.
///
/// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
m: [f64; MAX_P - 1],
}
impl Moments {
#[inline]
pub fn new() -> Moments {
Moments {
n: 0,
avg: 0.,
m: [0.; MAX_P - 1],
}
}
#[inline]
pub fn len(&self) -> u64 {
self.n
}
#[inline]
pub fn mean(&self) -> f64 {
self.avg
}
#[inline]
pub fn central_moment(&self, p: usize) -> f64 {
let n = f64::approx_from(self.n).unwrap();
match p {
0 => 1.,
1 => 0.,
_ => self.m[p - 2] / n
}
}
#[inline]
pub fn standardized_moment(&self, p: usize) -> f64 {
let variance = self.central_moment(2);
assert_ne!(variance, 0.);
let n = f64::approx_from(self.n).unwrap();
match p {
0 => n,
1 => 0.,
2 => 1.,
_ => self.central_moment(p) / pow(variance.sqrt(), p),
}
}
#[inline]
pub fn sample_variance(&self) -> f64 {
if self.n < 2 {
return 0.;
}
self.m[0] / f64::approx_from(self.n - 1).unwrap()
}
#[inline]
pub fn sample_skewness(&self) -> f64 {
if self.n < 2 {
return 0.;
}
let n = f64::approx_from(self.n).unwrap();
if self.n < 3 {
// Method of moments
return self.central_moment(3) /
(n * (self.central_moment(2) / (n - 1.)).powf(1.5))
}
// Adjusted Fisher-Pearson standardized moment coefficient
(n * (n - 1.)).sqrt() / (n * (n - 2.)) *
self.central_moment(3) / (self.central_moment(2) / n).powf(1.5)
}
#[inline]
pub fn sample_kurtosis(&self) -> f64 {
if self.n < 4 {
return 0.;
}
let n = f64::approx_from(self.n).unwrap();
(n + 1.) * n * self.central_moment(4) /
((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2)) -
3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.))
}
#[inline]
pub fn add(&mut self, x: f64) {
let mut result = Moments::new();
result.n = self.n + 1;
let delta = x - self.avg;
let n = f64::approx_from(result.n).unwrap();
result.avg = self.avg + delta / n;
let mut coeff_delta = delta;
let over_n = 1. / n;
let mut term1 = (n - 1.) * (-over_n);
let factor1 = -over_n;
let mut term2 = (n - 1.) * over_n;
let factor2 = (n - 1.) * over_n;
let factor_coeff = -delta * over_n;
for p in 2..=MAX_P {
term1 *= factor1;
term2 *= factor2;
coeff_delta *= delta;
result.m[p - 2] = self.m[p - 2] + (term1 + term2) * coeff_delta;
//println!("before: p={} m={:?}", p, self.m);
let mut coeff = 1.;
for k in 1..=(p - 2) {
coeff *= factor_coeff;
result.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() *
self.m[p - 2 - k] * coeff;
}
//println!("after: p={} m={:?}", p, self.m);
}
*self = result;
/*
self.n += 1;
let delta = x - self.avg;
let n = f64::approx_from(self.n).unwrap();
self.avg += delta / n;
let mut coeff_delta = delta;
let over_n = 1. / n;
let mut term1 = (n - 1.) * (-over_n);
let factor1 = -over_n;
let mut term2 = (n - 1.) * over_n;
let factor2 = (n - 1.) * over_n;
let factor_coeff = -delta * over_n;
for p in 2..=MAX_P {
term1 *= factor1;
term2 *= factor2;
coeff_delta *= delta;
self.m[p - 2] += (term1 + term2) * coeff_delta;
println!("before: p={} m={:?}", p, self.m);
let mut coeff = 1.;
for k in 1..=(p - 2) {
coeff *= factor_coeff;
self.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() *
self.m[p - 2 - k] * coeff;
}
println!("after: p={} m={:?}", p, self.m);
}
*/
}
#[inline]
pub fn merge(&mut self, other: &Moments) {
let mut result = Moments::new();
result.n = self.n + other.n;
if result.n == 0 {
return;
}
for i in 0..result.m.len() {
result.m[i] = self.m[i] + other.m[i];
}
let n = f64::approx_from(result.n).unwrap();
let n_a = f64::approx_from(self.n).unwrap();
let n_b = f64::approx_from(other.n).unwrap();
let n_a_over_n = n_a / n;
let n_b_over_n = n_b / n;
let delta = other.avg - self.avg;
result.avg = self.avg + n_b_over_n * delta;
let factor_a = -n_b_over_n * delta;
let factor_b = n_a_over_n * delta;
let mut term_a = n_a * factor_a;
let mut term_b = n_b * factor_b;
for p in 2..=MAX_P {
term_a *= factor_a;
term_b *= factor_b;
result.m[p - 2] += term_a + term_b;
let mut coeff_a = 1.;
let mut coeff_b = 1.;
let mut coeff_delta = 1.;
for k in 1..=(p - 2) {
coeff_a *= -n_b_over_n;
coeff_b *= n_a_over_n;
coeff_delta *= delta;
result.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() *
coeff_delta *
(self.m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b);
// TODO: use IterBinomial
}
}
*self = result;
}
}
impl_from_iterator!(Moments);

87
tests/moments.rs Normal file
View File

@ -0,0 +1,87 @@
#![cfg_attr(feature = "cargo-clippy", allow(float_cmp, map_clone))]
#[macro_use] extern crate average;
extern crate core;
#[cfg(feature = "serde")]
extern crate serde_json;
use core::iter::Iterator;
use average::{Moments};
#[test]
fn trivial() {
let mut a = Moments::new();
assert_eq!(a.len(), 0);
a.add(1.0);
assert_eq!(a.len(), 1);
assert_eq!(a.mean(), 1.0);
assert_eq!(a.central_moment(0), 1.0);
assert_eq!(a.central_moment(1), 0.0);
assert_eq!(a.central_moment(2), 0.0);
assert_eq!(a.central_moment(3), 0.0);
a.add(1.0);
assert_eq!(a.len(), 2);
assert_eq!(a.mean(), 1.0);
assert_eq!(a.central_moment(0), 1.0);
assert_eq!(a.central_moment(1), 0.0);
assert_eq!(a.central_moment(2), 0.0);
assert_eq!(a.central_moment(3), 0.0);
}
#[test]
fn simple() {
let mut a: Moments = (1..6).map(f64::from).collect();
assert_eq!(a.len(), 5);
assert_eq!(a.mean(), 3.0);
assert_eq!(a.central_moment(0), 1.0);
assert_eq!(a.central_moment(1), 0.0);
// variance
assert_eq!(a.central_moment(2), 2.0);
assert_eq!(a.standardized_moment(0), 5.0);
assert_eq!(a.standardized_moment(1), 0.0);
assert_eq!(a.standardized_moment(2), 1.0);
assert_almost_eq!(a.sample_skewness(), 0.0, 1e-15);
a.add(1.0);
// skewness
assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15);
// kurtosis
assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14);
}
/*
#[cfg(feature = "serde")]
#[test]
fn simple_serde() {
let a: Kurtosis = (1..6).map(f64::from).collect();
let b = serde_json::to_string(&a).unwrap();
assert_eq!(&b, "{\"avg\":{\"avg\":{\"avg\":{\"avg\":3.0,\"n\":5},\"sum_2\":10.0},\"sum_3\":0.0},\"sum_4\":34.0}");
let mut c: Kurtosis = serde_json::from_str(&b).unwrap();
assert_eq!(c.mean(), 3.0);
assert_eq!(c.len(), 5);
assert_eq!(c.sample_variance(), 2.5);
assert_almost_eq!(c.error_mean(), f64::sqrt(0.5), 1e-16);
assert_eq!(c.skewness(), 0.0);
c.add(1.0);
assert_almost_eq!(c.skewness(), 0.2795084971874741, 1e-15);
assert_almost_eq!(c.kurtosis(), -1.365, 1e-15);
}
#[test]
fn merge() {
let sequence: &[f64] = &[1., 2., 3., -4., 5.1, 6.3, 7.3, -8., 9., 1.];
for mid in 0..sequence.len() {
let (left, right) = sequence.split_at(mid);
let avg_total: Kurtosis = sequence.iter().map(|x| *x).collect();
let mut avg_left: Kurtosis = left.iter().map(|x| *x).collect();
let avg_right: Kurtosis = right.iter().map(|x| *x).collect();
avg_left.merge(&avg_right);
assert_eq!(avg_total.len(), avg_left.len());
assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14);
assert_almost_eq!(avg_total.kurtosis(), avg_left.kurtosis(), 1e-14);
}
}
*/