Implement incremental calculation of arbitrary moments
This commit is contained in:
parent
d9f0b056c6
commit
c1fab4722c
43
Cargo.toml
43
Cargo.toml
@ -1,30 +1,39 @@
|
||||
[package]
|
||||
authors = ["Vinzent Steinberg <Vinzent.Steinberg@gmail.com>"]
|
||||
name = "average"
|
||||
version = "0.7.0"
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/vks/average"
|
||||
categories = ["science", "no-std"]
|
||||
description = "Calculate statistics iteratively"
|
||||
documentation = "https://docs.rs/average"
|
||||
readme = "README.md"
|
||||
categories = ["science", "no-std"]
|
||||
keywords = ["stats", "mean", "skewness", "kurtosis", "quantile"]
|
||||
license = "MIT/Apache-2.0"
|
||||
name = "average"
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/vks/average"
|
||||
version = "0.7.0"
|
||||
|
||||
[[bench]]
|
||||
harness = false
|
||||
name = "mean"
|
||||
|
||||
[[bench]]
|
||||
harness = false
|
||||
name = "min"
|
||||
|
||||
[dependencies]
|
||||
conv = { version = "0.3", default-features = false }
|
||||
num-traits = "0.1"
|
||||
num-integer = "0.1"
|
||||
quickersort = "3"
|
||||
serde = { version = "1", optional = true, features = ["derive"]}
|
||||
|
||||
[dependencies.conv]
|
||||
default-features = false
|
||||
version = "0.3"
|
||||
|
||||
[dependencies.serde]
|
||||
features = ["derive"]
|
||||
optional = true
|
||||
version = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
bencher = "0.1"
|
||||
rand = "0.3"
|
||||
streaming-stats = "0.1"
|
||||
serde_json = "1"
|
||||
|
||||
[[bench]]
|
||||
name = "mean"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "min"
|
||||
harness = false
|
||||
streaming-stats = "0.1"
|
||||
|
@ -70,16 +70,20 @@
|
||||
//! [`Min`]: ./struct.Min.html
|
||||
//! [`Max`]: ./struct.Max.html
|
||||
//! [`concatenate`]: ./macro.concatenate.html
|
||||
#![feature(inclusive_range_syntax)]
|
||||
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(float_cmp))]
|
||||
|
||||
#![no_std]
|
||||
//#![no_std]
|
||||
extern crate core;
|
||||
|
||||
extern crate conv;
|
||||
extern crate quickersort;
|
||||
#[cfg(feature = "serde")]
|
||||
#[macro_use]
|
||||
extern crate serde;
|
||||
extern crate num_traits;
|
||||
extern crate num_integer;
|
||||
|
||||
#[macro_use] mod macros;
|
||||
mod moments;
|
||||
@ -88,7 +92,7 @@ mod minmax;
|
||||
mod quantile;
|
||||
mod traits;
|
||||
|
||||
pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError};
|
||||
pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError, Moments};
|
||||
pub use weighted_mean::{WeightedMean, WeightedMeanWithError};
|
||||
pub use minmax::{Min, Max};
|
||||
pub use quantile::Quantile;
|
||||
|
@ -46,6 +46,7 @@ impl Kurtosis {
|
||||
+ 6. * delta_n_sq * self.avg.avg.sum_2
|
||||
- 4. * delta_n * self.avg.sum_3;
|
||||
self.avg.add_inner(delta, delta_n);
|
||||
println!("skewness={} kurtosis={}", self.skewness(), self.kurtosis());
|
||||
}
|
||||
|
||||
/// Determine whether the sample is empty.
|
||||
@ -96,7 +97,7 @@ impl Kurtosis {
|
||||
self.avg.skewness()
|
||||
}
|
||||
|
||||
/// Estimate the kurtosis of the population.
|
||||
/// Estimate the excess kurtosis of the population.
|
||||
#[inline]
|
||||
pub fn kurtosis(&self) -> f64 {
|
||||
if self.sum_4 == 0. {
|
||||
|
@ -1,6 +1,8 @@
|
||||
use core;
|
||||
|
||||
use conv::ApproxFrom;
|
||||
use num_traits::pow;
|
||||
use num_integer::binomial;
|
||||
|
||||
use super::{Estimate, Merge};
|
||||
|
||||
@ -14,3 +16,210 @@ include!("kurtosis.rs");
|
||||
|
||||
/// Alias for `Variance`.
|
||||
pub type MeanWithError = Variance;
|
||||
|
||||
const MAX_P: usize = 4;
|
||||
|
||||
pub struct Moments {
|
||||
/// Number of samples.
|
||||
///
|
||||
/// Technically, this is the same as m_0, but we want this to be an integer
|
||||
/// to avoid numerical issues, so we store it separately.
|
||||
n: u64,
|
||||
/// Average.
|
||||
avg: f64,
|
||||
/// Moments times `n`.
|
||||
///
|
||||
/// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
|
||||
m: [f64; MAX_P - 1],
|
||||
}
|
||||
|
||||
impl Moments {
|
||||
#[inline]
|
||||
pub fn new() -> Moments {
|
||||
Moments {
|
||||
n: 0,
|
||||
avg: 0.,
|
||||
m: [0.; MAX_P - 1],
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn len(&self) -> u64 {
|
||||
self.n
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn mean(&self) -> f64 {
|
||||
self.avg
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn central_moment(&self, p: usize) -> f64 {
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
match p {
|
||||
0 => 1.,
|
||||
1 => 0.,
|
||||
_ => self.m[p - 2] / n
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn standardized_moment(&self, p: usize) -> f64 {
|
||||
let variance = self.central_moment(2);
|
||||
assert_ne!(variance, 0.);
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
match p {
|
||||
0 => n,
|
||||
1 => 0.,
|
||||
2 => 1.,
|
||||
_ => self.central_moment(p) / pow(variance.sqrt(), p),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn sample_variance(&self) -> f64 {
|
||||
if self.n < 2 {
|
||||
return 0.;
|
||||
}
|
||||
self.m[0] / f64::approx_from(self.n - 1).unwrap()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn sample_skewness(&self) -> f64 {
|
||||
if self.n < 2 {
|
||||
return 0.;
|
||||
}
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
if self.n < 3 {
|
||||
// Method of moments
|
||||
return self.central_moment(3) /
|
||||
(n * (self.central_moment(2) / (n - 1.)).powf(1.5))
|
||||
}
|
||||
// Adjusted Fisher-Pearson standardized moment coefficient
|
||||
(n * (n - 1.)).sqrt() / (n * (n - 2.)) *
|
||||
self.central_moment(3) / (self.central_moment(2) / n).powf(1.5)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn sample_kurtosis(&self) -> f64 {
|
||||
if self.n < 4 {
|
||||
return 0.;
|
||||
}
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
(n + 1.) * n * self.central_moment(4) /
|
||||
((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2)) -
|
||||
3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn add(&mut self, x: f64) {
|
||||
let mut result = Moments::new();
|
||||
result.n = self.n + 1;
|
||||
let delta = x - self.avg;
|
||||
let n = f64::approx_from(result.n).unwrap();
|
||||
result.avg = self.avg + delta / n;
|
||||
|
||||
let mut coeff_delta = delta;
|
||||
let over_n = 1. / n;
|
||||
let mut term1 = (n - 1.) * (-over_n);
|
||||
let factor1 = -over_n;
|
||||
let mut term2 = (n - 1.) * over_n;
|
||||
let factor2 = (n - 1.) * over_n;
|
||||
|
||||
let factor_coeff = -delta * over_n;
|
||||
|
||||
for p in 2..=MAX_P {
|
||||
term1 *= factor1;
|
||||
term2 *= factor2;
|
||||
coeff_delta *= delta;
|
||||
result.m[p - 2] = self.m[p - 2] + (term1 + term2) * coeff_delta;
|
||||
|
||||
//println!("before: p={} m={:?}", p, self.m);
|
||||
let mut coeff = 1.;
|
||||
for k in 1..=(p - 2) {
|
||||
coeff *= factor_coeff;
|
||||
result.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() *
|
||||
self.m[p - 2 - k] * coeff;
|
||||
}
|
||||
//println!("after: p={} m={:?}", p, self.m);
|
||||
}
|
||||
*self = result;
|
||||
/*
|
||||
self.n += 1;
|
||||
let delta = x - self.avg;
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
self.avg += delta / n;
|
||||
|
||||
let mut coeff_delta = delta;
|
||||
let over_n = 1. / n;
|
||||
let mut term1 = (n - 1.) * (-over_n);
|
||||
let factor1 = -over_n;
|
||||
let mut term2 = (n - 1.) * over_n;
|
||||
let factor2 = (n - 1.) * over_n;
|
||||
|
||||
let factor_coeff = -delta * over_n;
|
||||
|
||||
for p in 2..=MAX_P {
|
||||
term1 *= factor1;
|
||||
term2 *= factor2;
|
||||
coeff_delta *= delta;
|
||||
self.m[p - 2] += (term1 + term2) * coeff_delta;
|
||||
|
||||
println!("before: p={} m={:?}", p, self.m);
|
||||
let mut coeff = 1.;
|
||||
for k in 1..=(p - 2) {
|
||||
coeff *= factor_coeff;
|
||||
self.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() *
|
||||
self.m[p - 2 - k] * coeff;
|
||||
}
|
||||
println!("after: p={} m={:?}", p, self.m);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn merge(&mut self, other: &Moments) {
|
||||
let mut result = Moments::new();
|
||||
result.n = self.n + other.n;
|
||||
if result.n == 0 {
|
||||
return;
|
||||
}
|
||||
for i in 0..result.m.len() {
|
||||
result.m[i] = self.m[i] + other.m[i];
|
||||
}
|
||||
let n = f64::approx_from(result.n).unwrap();
|
||||
let n_a = f64::approx_from(self.n).unwrap();
|
||||
let n_b = f64::approx_from(other.n).unwrap();
|
||||
let n_a_over_n = n_a / n;
|
||||
let n_b_over_n = n_b / n;
|
||||
let delta = other.avg - self.avg;
|
||||
result.avg = self.avg + n_b_over_n * delta;
|
||||
|
||||
let factor_a = -n_b_over_n * delta;
|
||||
let factor_b = n_a_over_n * delta;
|
||||
let mut term_a = n_a * factor_a;
|
||||
let mut term_b = n_b * factor_b;
|
||||
for p in 2..=MAX_P {
|
||||
term_a *= factor_a;
|
||||
term_b *= factor_b;
|
||||
result.m[p - 2] += term_a + term_b;
|
||||
|
||||
let mut coeff_a = 1.;
|
||||
let mut coeff_b = 1.;
|
||||
let mut coeff_delta = 1.;
|
||||
for k in 1..=(p - 2) {
|
||||
coeff_a *= -n_b_over_n;
|
||||
coeff_b *= n_a_over_n;
|
||||
coeff_delta *= delta;
|
||||
result.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() *
|
||||
coeff_delta *
|
||||
(self.m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b);
|
||||
// TODO: use IterBinomial
|
||||
}
|
||||
}
|
||||
|
||||
*self = result;
|
||||
}
|
||||
}
|
||||
|
||||
impl_from_iterator!(Moments);
|
||||
|
87
tests/moments.rs
Normal file
87
tests/moments.rs
Normal file
@ -0,0 +1,87 @@
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(float_cmp, map_clone))]
|
||||
|
||||
#[macro_use] extern crate average;
|
||||
|
||||
extern crate core;
|
||||
#[cfg(feature = "serde")]
|
||||
extern crate serde_json;
|
||||
|
||||
use core::iter::Iterator;
|
||||
|
||||
use average::{Moments};
|
||||
|
||||
#[test]
|
||||
fn trivial() {
|
||||
let mut a = Moments::new();
|
||||
assert_eq!(a.len(), 0);
|
||||
a.add(1.0);
|
||||
assert_eq!(a.len(), 1);
|
||||
assert_eq!(a.mean(), 1.0);
|
||||
assert_eq!(a.central_moment(0), 1.0);
|
||||
assert_eq!(a.central_moment(1), 0.0);
|
||||
assert_eq!(a.central_moment(2), 0.0);
|
||||
assert_eq!(a.central_moment(3), 0.0);
|
||||
a.add(1.0);
|
||||
assert_eq!(a.len(), 2);
|
||||
assert_eq!(a.mean(), 1.0);
|
||||
assert_eq!(a.central_moment(0), 1.0);
|
||||
assert_eq!(a.central_moment(1), 0.0);
|
||||
assert_eq!(a.central_moment(2), 0.0);
|
||||
assert_eq!(a.central_moment(3), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let mut a: Moments = (1..6).map(f64::from).collect();
|
||||
assert_eq!(a.len(), 5);
|
||||
assert_eq!(a.mean(), 3.0);
|
||||
assert_eq!(a.central_moment(0), 1.0);
|
||||
assert_eq!(a.central_moment(1), 0.0);
|
||||
// variance
|
||||
assert_eq!(a.central_moment(2), 2.0);
|
||||
assert_eq!(a.standardized_moment(0), 5.0);
|
||||
assert_eq!(a.standardized_moment(1), 0.0);
|
||||
assert_eq!(a.standardized_moment(2), 1.0);
|
||||
assert_almost_eq!(a.sample_skewness(), 0.0, 1e-15);
|
||||
a.add(1.0);
|
||||
// skewness
|
||||
assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15);
|
||||
// kurtosis
|
||||
assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14);
|
||||
}
|
||||
|
||||
/*
|
||||
#[cfg(feature = "serde")]
|
||||
#[test]
|
||||
fn simple_serde() {
|
||||
let a: Kurtosis = (1..6).map(f64::from).collect();
|
||||
let b = serde_json::to_string(&a).unwrap();
|
||||
assert_eq!(&b, "{\"avg\":{\"avg\":{\"avg\":{\"avg\":3.0,\"n\":5},\"sum_2\":10.0},\"sum_3\":0.0},\"sum_4\":34.0}");
|
||||
let mut c: Kurtosis = serde_json::from_str(&b).unwrap();
|
||||
assert_eq!(c.mean(), 3.0);
|
||||
assert_eq!(c.len(), 5);
|
||||
assert_eq!(c.sample_variance(), 2.5);
|
||||
assert_almost_eq!(c.error_mean(), f64::sqrt(0.5), 1e-16);
|
||||
assert_eq!(c.skewness(), 0.0);
|
||||
c.add(1.0);
|
||||
assert_almost_eq!(c.skewness(), 0.2795084971874741, 1e-15);
|
||||
assert_almost_eq!(c.kurtosis(), -1.365, 1e-15);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge() {
|
||||
let sequence: &[f64] = &[1., 2., 3., -4., 5.1, 6.3, 7.3, -8., 9., 1.];
|
||||
for mid in 0..sequence.len() {
|
||||
let (left, right) = sequence.split_at(mid);
|
||||
let avg_total: Kurtosis = sequence.iter().map(|x| *x).collect();
|
||||
let mut avg_left: Kurtosis = left.iter().map(|x| *x).collect();
|
||||
let avg_right: Kurtosis = right.iter().map(|x| *x).collect();
|
||||
avg_left.merge(&avg_right);
|
||||
assert_eq!(avg_total.len(), avg_left.len());
|
||||
assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
|
||||
assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
|
||||
assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14);
|
||||
assert_almost_eq!(avg_total.kurtosis(), avg_left.kurtosis(), 1e-14);
|
||||
}
|
||||
}
|
||||
*/
|
Loading…
Reference in New Issue
Block a user