diff --git a/Cargo.toml b/Cargo.toml index c826900..d823ef0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,30 +1,39 @@ [package] authors = ["Vinzent Steinberg "] -name = "average" -version = "0.7.0" -license = "MIT/Apache-2.0" -repository = "https://github.com/vks/average" +categories = ["science", "no-std"] description = "Calculate statistics iteratively" documentation = "https://docs.rs/average" -readme = "README.md" -categories = ["science", "no-std"] keywords = ["stats", "mean", "skewness", "kurtosis", "quantile"] +license = "MIT/Apache-2.0" +name = "average" +readme = "README.md" +repository = "https://github.com/vks/average" +version = "0.7.0" + +[[bench]] +harness = false +name = "mean" + +[[bench]] +harness = false +name = "min" [dependencies] -conv = { version = "0.3", default-features = false } +num-traits = "0.1" +num-integer = "0.1" quickersort = "3" -serde = { version = "1", optional = true, features = ["derive"]} + +[dependencies.conv] +default-features = false +version = "0.3" + +[dependencies.serde] +features = ["derive"] +optional = true +version = "1" [dev-dependencies] bencher = "0.1" rand = "0.3" -streaming-stats = "0.1" serde_json = "1" - -[[bench]] -name = "mean" -harness = false - -[[bench]] -name = "min" -harness = false +streaming-stats = "0.1" diff --git a/src/lib.rs b/src/lib.rs index 623a248..e348cff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -70,16 +70,20 @@ //! [`Min`]: ./struct.Min.html //! [`Max`]: ./struct.Max.html //! [`concatenate`]: ./macro.concatenate.html +#![feature(inclusive_range_syntax)] #![cfg_attr(feature = "cargo-clippy", allow(float_cmp))] -#![no_std] +//#![no_std] +extern crate core; extern crate conv; extern crate quickersort; #[cfg(feature = "serde")] #[macro_use] extern crate serde; +extern crate num_traits; +extern crate num_integer; #[macro_use] mod macros; mod moments; @@ -88,7 +92,7 @@ mod minmax; mod quantile; mod traits; -pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError}; +pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError, Moments}; pub use weighted_mean::{WeightedMean, WeightedMeanWithError}; pub use minmax::{Min, Max}; pub use quantile::Quantile; diff --git a/src/moments/kurtosis.rs b/src/moments/kurtosis.rs index 5f4f1a5..9cb1a24 100644 --- a/src/moments/kurtosis.rs +++ b/src/moments/kurtosis.rs @@ -46,6 +46,7 @@ impl Kurtosis { + 6. * delta_n_sq * self.avg.avg.sum_2 - 4. * delta_n * self.avg.sum_3; self.avg.add_inner(delta, delta_n); + println!("skewness={} kurtosis={}", self.skewness(), self.kurtosis()); } /// Determine whether the sample is empty. @@ -96,7 +97,7 @@ impl Kurtosis { self.avg.skewness() } - /// Estimate the kurtosis of the population. + /// Estimate the excess kurtosis of the population. #[inline] pub fn kurtosis(&self) -> f64 { if self.sum_4 == 0. { diff --git a/src/moments/mod.rs b/src/moments/mod.rs index cec57e5..dcaf112 100644 --- a/src/moments/mod.rs +++ b/src/moments/mod.rs @@ -1,6 +1,8 @@ use core; use conv::ApproxFrom; +use num_traits::pow; +use num_integer::binomial; use super::{Estimate, Merge}; @@ -14,3 +16,210 @@ include!("kurtosis.rs"); /// Alias for `Variance`. pub type MeanWithError = Variance; + +const MAX_P: usize = 4; + +pub struct Moments { + /// Number of samples. + /// + /// Technically, this is the same as m_0, but we want this to be an integer + /// to avoid numerical issues, so we store it separately. + n: u64, + /// Average. + avg: f64, + /// Moments times `n`. + /// + /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition. + m: [f64; MAX_P - 1], +} + +impl Moments { + #[inline] + pub fn new() -> Moments { + Moments { + n: 0, + avg: 0., + m: [0.; MAX_P - 1], + } + } + + #[inline] + pub fn len(&self) -> u64 { + self.n + } + + #[inline] + pub fn mean(&self) -> f64 { + self.avg + } + + #[inline] + pub fn central_moment(&self, p: usize) -> f64 { + let n = f64::approx_from(self.n).unwrap(); + match p { + 0 => 1., + 1 => 0., + _ => self.m[p - 2] / n + } + } + + #[inline] + pub fn standardized_moment(&self, p: usize) -> f64 { + let variance = self.central_moment(2); + assert_ne!(variance, 0.); + let n = f64::approx_from(self.n).unwrap(); + match p { + 0 => n, + 1 => 0., + 2 => 1., + _ => self.central_moment(p) / pow(variance.sqrt(), p), + } + } + + #[inline] + pub fn sample_variance(&self) -> f64 { + if self.n < 2 { + return 0.; + } + self.m[0] / f64::approx_from(self.n - 1).unwrap() + } + + #[inline] + pub fn sample_skewness(&self) -> f64 { + if self.n < 2 { + return 0.; + } + let n = f64::approx_from(self.n).unwrap(); + if self.n < 3 { + // Method of moments + return self.central_moment(3) / + (n * (self.central_moment(2) / (n - 1.)).powf(1.5)) + } + // Adjusted Fisher-Pearson standardized moment coefficient + (n * (n - 1.)).sqrt() / (n * (n - 2.)) * + self.central_moment(3) / (self.central_moment(2) / n).powf(1.5) + } + + #[inline] + pub fn sample_kurtosis(&self) -> f64 { + if self.n < 4 { + return 0.; + } + let n = f64::approx_from(self.n).unwrap(); + (n + 1.) * n * self.central_moment(4) / + ((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2)) - + 3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.)) + } + + #[inline] + pub fn add(&mut self, x: f64) { + let mut result = Moments::new(); + result.n = self.n + 1; + let delta = x - self.avg; + let n = f64::approx_from(result.n).unwrap(); + result.avg = self.avg + delta / n; + + let mut coeff_delta = delta; + let over_n = 1. / n; + let mut term1 = (n - 1.) * (-over_n); + let factor1 = -over_n; + let mut term2 = (n - 1.) * over_n; + let factor2 = (n - 1.) * over_n; + + let factor_coeff = -delta * over_n; + + for p in 2..=MAX_P { + term1 *= factor1; + term2 *= factor2; + coeff_delta *= delta; + result.m[p - 2] = self.m[p - 2] + (term1 + term2) * coeff_delta; + + //println!("before: p={} m={:?}", p, self.m); + let mut coeff = 1.; + for k in 1..=(p - 2) { + coeff *= factor_coeff; + result.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() * + self.m[p - 2 - k] * coeff; + } + //println!("after: p={} m={:?}", p, self.m); + } + *self = result; + /* + self.n += 1; + let delta = x - self.avg; + let n = f64::approx_from(self.n).unwrap(); + self.avg += delta / n; + + let mut coeff_delta = delta; + let over_n = 1. / n; + let mut term1 = (n - 1.) * (-over_n); + let factor1 = -over_n; + let mut term2 = (n - 1.) * over_n; + let factor2 = (n - 1.) * over_n; + + let factor_coeff = -delta * over_n; + + for p in 2..=MAX_P { + term1 *= factor1; + term2 *= factor2; + coeff_delta *= delta; + self.m[p - 2] += (term1 + term2) * coeff_delta; + + println!("before: p={} m={:?}", p, self.m); + let mut coeff = 1.; + for k in 1..=(p - 2) { + coeff *= factor_coeff; + self.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() * + self.m[p - 2 - k] * coeff; + } + println!("after: p={} m={:?}", p, self.m); + } + */ + } + + #[inline] + pub fn merge(&mut self, other: &Moments) { + let mut result = Moments::new(); + result.n = self.n + other.n; + if result.n == 0 { + return; + } + for i in 0..result.m.len() { + result.m[i] = self.m[i] + other.m[i]; + } + let n = f64::approx_from(result.n).unwrap(); + let n_a = f64::approx_from(self.n).unwrap(); + let n_b = f64::approx_from(other.n).unwrap(); + let n_a_over_n = n_a / n; + let n_b_over_n = n_b / n; + let delta = other.avg - self.avg; + result.avg = self.avg + n_b_over_n * delta; + + let factor_a = -n_b_over_n * delta; + let factor_b = n_a_over_n * delta; + let mut term_a = n_a * factor_a; + let mut term_b = n_b * factor_b; + for p in 2..=MAX_P { + term_a *= factor_a; + term_b *= factor_b; + result.m[p - 2] += term_a + term_b; + + let mut coeff_a = 1.; + let mut coeff_b = 1.; + let mut coeff_delta = 1.; + for k in 1..=(p - 2) { + coeff_a *= -n_b_over_n; + coeff_b *= n_a_over_n; + coeff_delta *= delta; + result.m[p - 2] += f64::approx_from(binomial(p, k)).unwrap() * + coeff_delta * + (self.m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b); + // TODO: use IterBinomial + } + } + + *self = result; + } +} + +impl_from_iterator!(Moments); diff --git a/tests/moments.rs b/tests/moments.rs new file mode 100644 index 0000000..f835cd1 --- /dev/null +++ b/tests/moments.rs @@ -0,0 +1,87 @@ +#![cfg_attr(feature = "cargo-clippy", allow(float_cmp, map_clone))] + +#[macro_use] extern crate average; + +extern crate core; +#[cfg(feature = "serde")] +extern crate serde_json; + +use core::iter::Iterator; + +use average::{Moments}; + +#[test] +fn trivial() { + let mut a = Moments::new(); + assert_eq!(a.len(), 0); + a.add(1.0); + assert_eq!(a.len(), 1); + assert_eq!(a.mean(), 1.0); + assert_eq!(a.central_moment(0), 1.0); + assert_eq!(a.central_moment(1), 0.0); + assert_eq!(a.central_moment(2), 0.0); + assert_eq!(a.central_moment(3), 0.0); + a.add(1.0); + assert_eq!(a.len(), 2); + assert_eq!(a.mean(), 1.0); + assert_eq!(a.central_moment(0), 1.0); + assert_eq!(a.central_moment(1), 0.0); + assert_eq!(a.central_moment(2), 0.0); + assert_eq!(a.central_moment(3), 0.0); +} + +#[test] +fn simple() { + let mut a: Moments = (1..6).map(f64::from).collect(); + assert_eq!(a.len(), 5); + assert_eq!(a.mean(), 3.0); + assert_eq!(a.central_moment(0), 1.0); + assert_eq!(a.central_moment(1), 0.0); + // variance + assert_eq!(a.central_moment(2), 2.0); + assert_eq!(a.standardized_moment(0), 5.0); + assert_eq!(a.standardized_moment(1), 0.0); + assert_eq!(a.standardized_moment(2), 1.0); + assert_almost_eq!(a.sample_skewness(), 0.0, 1e-15); + a.add(1.0); + // skewness + assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15); + // kurtosis + assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14); +} + +/* +#[cfg(feature = "serde")] +#[test] +fn simple_serde() { + let a: Kurtosis = (1..6).map(f64::from).collect(); + let b = serde_json::to_string(&a).unwrap(); + assert_eq!(&b, "{\"avg\":{\"avg\":{\"avg\":{\"avg\":3.0,\"n\":5},\"sum_2\":10.0},\"sum_3\":0.0},\"sum_4\":34.0}"); + let mut c: Kurtosis = serde_json::from_str(&b).unwrap(); + assert_eq!(c.mean(), 3.0); + assert_eq!(c.len(), 5); + assert_eq!(c.sample_variance(), 2.5); + assert_almost_eq!(c.error_mean(), f64::sqrt(0.5), 1e-16); + assert_eq!(c.skewness(), 0.0); + c.add(1.0); + assert_almost_eq!(c.skewness(), 0.2795084971874741, 1e-15); + assert_almost_eq!(c.kurtosis(), -1.365, 1e-15); +} + +#[test] +fn merge() { + let sequence: &[f64] = &[1., 2., 3., -4., 5.1, 6.3, 7.3, -8., 9., 1.]; + for mid in 0..sequence.len() { + let (left, right) = sequence.split_at(mid); + let avg_total: Kurtosis = sequence.iter().map(|x| *x).collect(); + let mut avg_left: Kurtosis = left.iter().map(|x| *x).collect(); + let avg_right: Kurtosis = right.iter().map(|x| *x).collect(); + avg_left.merge(&avg_right); + assert_eq!(avg_total.len(), avg_left.len()); + assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14); + assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14); + assert_almost_eq!(avg_total.skewness(), avg_left.skewness(), 1e-14); + assert_almost_eq!(avg_total.kurtosis(), avg_left.kurtosis(), 1e-14); + } +} +*/