Make it possible to calculate an arbitrary number of moments

2018-07-10 17:19:57 +02:00 · 2018-07-10 17:19:57 +02:00 · 663009f358
commit 663009f358
parent 1e7a852862
3 changed files with 226 additions and 188 deletions
--- a/src/lib.rs
+++ b/src/lib.rs
@ -96,17 +96,18 @@ extern crate num_traits;
 extern crate num_integer;

 #[macro_use] mod macros;
-mod moments;
+#[macro_use] mod moments;
 mod weighted_mean;
 mod minmax;
 mod quantile;
 mod traits;
 #[macro_use] mod histogram;

-pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError, Moments};
+pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError};
 pub use weighted_mean::{WeightedMean, WeightedMeanWithError};
 pub use minmax::{Min, Max};
 pub use quantile::Quantile;
 pub use traits::{Estimate, Merge, Histogram};

 define_histogram!(Histogram10, 10);
+define_moments!(Moments4, 4);
--- a/src/moments/mod.rs
+++ b/src/moments/mod.rs
@ -1,8 +1,6 @@
 use core;

 use conv::ApproxFrom;
-use num_traits::pow;
-use num_integer::IterBinomial;

 use super::{Estimate, Merge};

@ -11,217 +9,256 @@ include!("variance.rs");
 include!("skewness.rs");
 include!("kurtosis.rs");

-
 /// Alias for `Variance`.
 pub type MeanWithError = Variance;

-/// The maximal order of the moment to be calculated.
-const MAX_P: usize = 4;
-
-/// Estimate the first N moments of a sequence of numbers a sequence of numbers
-/// ("population").
+/// Define an estimator of all moments up to a number given at compile time.
 ///
 /// This uses a [general algorithm][paper] and is less efficient than the
 /// specialized implementations (such as [`Mean`], [`Variance`], [`Skewness`]
-/// and [`Kurtosis`]).
+/// and [`Kurtosis`]), but it works for any number of moments >= 4.
 ///
 /// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
 /// [`Mean`]: ./struct.Mean.html
 /// [`Variance`]: ./struct.Variance.html
 /// [`Skewness`]: ./struct.Skewness.html
 /// [`Kurtosis`]: ./struct.Kurtosis.html
-#[derive(Debug, Clone)]
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-pub struct Moments {
-    /// Number of samples.
-    ///
-    /// Technically, this is the same as m_0, but we want this to be an integer
-    /// to avoid numerical issues, so we store it separately.
-    n: u64,
-    /// Average.
-    avg: f64,
-    /// Moments times `n`.
-    ///
-    /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
-    m: [f64; MAX_P - 1],
-}
+///
+///
+/// # Example
+///
+/// ```
+/// # extern crate core;
+/// # extern crate conv;
+/// # extern crate num_integer;
+/// # extern crate num_traits;
+/// # #[macro_use] extern crate average;
+/// # fn main() {
+/// define_moments!(Moments4, 4);
+///
+/// let mut a: Moments4 = (1..6).map(f64::from).collect();
+/// assert_eq!(a.len(), 5);
+/// assert_eq!(a.mean(), 3.0);
+/// assert_eq!(a.central_moment(0), 1.0);
+/// assert_eq!(a.central_moment(1), 0.0);
+/// assert_eq!(a.central_moment(2), 2.0);
+/// assert_eq!(a.standardized_moment(0), 5.0);
+/// assert_eq!(a.standardized_moment(1), 0.0);
+/// assert_eq!(a.standardized_moment(2), 1.0);
+/// a.add(1.0);
+/// // skewness
+/// assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15);
+/// // kurtosis
+/// assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14);
+/// # }
+/// ```
+#[macro_export]
+//#[allow(dead_code)]
+macro_rules! define_moments {
+    ($name:ident, $MAX_MOMENT:expr) => (
+        use ::conv::ApproxFrom;
+        use ::num_traits::pow;
+        use ::num_integer::IterBinomial;

-impl Moments {
-    /// Create a new moments estimator.
-    #[inline]
-    pub fn new() -> Moments {
-        Moments {
-            n: 0,
-            avg: 0.,
-            m: [0.; MAX_P - 1],
+        /// The maximal order of the moment to be calculated.
+        const MAX_MOMENT: usize = $MAX_MOMENT;
+
+        /// Estimate the first N moments of a sequence of numbers a sequence of numbers
+        /// ("population").
+        #[derive(Debug, Clone)]
+        #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+        pub struct $name {
+            /// Number of samples.
+            ///
+            /// Technically, this is the same as m_0, but we want this to be an integer
+            /// to avoid numerical issues, so we store it separately.
+            n: u64,
+            /// Average.
+            avg: f64,
+            /// Moments times `n`.
+            ///
+            /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
+            m: [f64; MAX_MOMENT - 1],
        }
-    }

-    /// Determine whether the sample is empty.
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.n == 0
-    }
+        impl $name {
+            /// Create a new moments estimator.
+            #[inline]
+            pub fn new() -> $name {
+                $name {
+                    n: 0,
+                    avg: 0.,
+                    m: [0.; MAX_MOMENT - 1],
+                }
+            }

-    /// Return the sample size.
-    #[inline]
-    pub fn len(&self) -> u64 {
-        self.n
-    }
+            /// Determine whether the sample is empty.
+            #[inline]
+            pub fn is_empty(&self) -> bool {
+                self.n == 0
+            }

-    /// Estimate the mean of the population.
-    ///
-    /// Returns 0 for an empty sample.
-    #[inline]
-    pub fn mean(&self) -> f64 {
-        self.avg
-    }
+            /// Return the sample size.
+            #[inline]
+            pub fn len(&self) -> u64 {
+                self.n
+            }

-    /// Estimate the `p`th central moment of the population.
-    #[inline]
-    pub fn central_moment(&self, p: usize) -> f64 {
-        let n = f64::approx_from(self.n).unwrap();
-        match p {
-            0 => 1.,
-            1 => 0.,
-            _ => self.m[p - 2] / n
-        }
-    }
+            /// Estimate the mean of the population.
+            ///
+            /// Returns 0 for an empty sample.
+            #[inline]
+            pub fn mean(&self) -> f64 {
+                self.avg
+            }

-    /// Estimate the `p`th standardized moment of the population.
-    #[inline]
-    pub fn standardized_moment(&self, p: usize) -> f64 {
-        let variance = self.central_moment(2);
-        assert_ne!(variance, 0.);
-        let n = f64::approx_from(self.n).unwrap();
-        match p {
-            0 => n,
-            1 => 0.,
-            2 => 1.,
-            _ => self.central_moment(p) / pow(variance.sqrt(), p),
-        }
-    }
+            /// Estimate the `p`th central moment of the population.
+            #[inline]
+            pub fn central_moment(&self, p: usize) -> f64 {
+                let n = f64::approx_from(self.n).unwrap();
+                match p {
+                    0 => 1.,
+                    1 => 0.,
+                    _ => self.m[p - 2] / n
+                }
+            }

-    /// Calculate the sample variance.
-    ///
-    /// This is an unbiased estimator of the variance of the population.
-    #[inline]
-    pub fn sample_variance(&self) -> f64 {
-        if self.n < 2 {
-            return 0.;
-        }
-        self.m[0] / f64::approx_from(self.n - 1).unwrap()
-    }
+            /// Estimate the `p`th standardized moment of the population.
+            #[inline]
+            pub fn standardized_moment(&self, p: usize) -> f64 {
+                let variance = self.central_moment(2);
+                assert_ne!(variance, 0.);
+                let n = f64::approx_from(self.n).unwrap();
+                match p {
+                    0 => n,
+                    1 => 0.,
+                    2 => 1.,
+                    _ => self.central_moment(p) / pow(variance.sqrt(), p),
+                }
+            }

-    /// Calculate the sample skewness.
-    #[inline]
-    pub fn sample_skewness(&self) -> f64 {
-        if self.n < 2 {
-            return 0.;
-        }
-        let n = f64::approx_from(self.n).unwrap();
-        if self.n < 3 {
-            // Method of moments
-            return self.central_moment(3) /
-                (n * (self.central_moment(2) / (n - 1.)).powf(1.5))
-        }
-        // Adjusted Fisher-Pearson standardized moment coefficient
-        (n * (n - 1.)).sqrt() / (n * (n - 2.)) *
-            self.central_moment(3) / (self.central_moment(2) / n).powf(1.5)
-    }
+            /// Calculate the sample variance.
+            ///
+            /// This is an unbiased estimator of the variance of the population.
+            #[inline]
+            pub fn sample_variance(&self) -> f64 {
+                if self.n < 2 {
+                    return 0.;
+                }
+                self.m[0] / f64::approx_from(self.n - 1).unwrap()
+            }

-    /// Calculate the sample excess kurtosis.
-    #[inline]
-    pub fn sample_excess_kurtosis(&self) -> f64 {
-        if self.n < 4 {
-            return 0.;
-        }
-        let n = f64::approx_from(self.n).unwrap();
-        (n + 1.) * n * self.central_moment(4) /
-            ((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2)) -
-            3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.))
-    }
+            /// Calculate the sample skewness.
+            #[inline]
+            pub fn sample_skewness(&self) -> f64 {
+                if self.n < 2 {
+                    return 0.;
+                }
+                let n = f64::approx_from(self.n).unwrap();
+                if self.n < 3 {
+                    // Method of moments
+                    return self.central_moment(3) /
+                        (n * (self.central_moment(2) / (n - 1.)).powf(1.5))
+                }
+                // Adjusted Fisher-Pearson standardized moment coefficient
+                (n * (n - 1.)).sqrt() / (n * (n - 2.)) *
+                    self.central_moment(3) / (self.central_moment(2) / n).powf(1.5)
+            }

-    /// Add an observation sampled from the population.
-    #[inline]
-    pub fn add(&mut self, x: f64) {
-        self.n += 1;
-        let delta = x - self.avg;
-        let n = f64::approx_from(self.n).unwrap();
-        self.avg += delta / n;
+            /// Calculate the sample excess kurtosis.
+            #[inline]
+            pub fn sample_excess_kurtosis(&self) -> f64 {
+                if self.n < 4 {
+                    return 0.;
+                }
+                let n = f64::approx_from(self.n).unwrap();
+                (n + 1.) * n * self.central_moment(4) /
+                    ((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2)) -
+                    3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.))
+            }

-        let mut coeff_delta = delta;
-        let over_n = 1. / n;
-        let mut term1 = (n - 1.) * (-over_n);
-        let factor1 = -over_n;
-        let mut term2 = (n - 1.) * over_n;
-        let factor2 = (n - 1.) * over_n;
+            /// Add an observation sampled from the population.
+            #[inline]
+            pub fn add(&mut self, x: f64) {
+                self.n += 1;
+                let delta = x - self.avg;
+                let n = f64::approx_from(self.n).unwrap();
+                self.avg += delta / n;

-        let factor_coeff = -delta * over_n;
+                let mut coeff_delta = delta;
+                let over_n = 1. / n;
+                let mut term1 = (n - 1.) * (-over_n);
+                let factor1 = -over_n;
+                let mut term2 = (n - 1.) * over_n;
+                let factor2 = (n - 1.) * over_n;

-        let prev_m = self.m;
-        for p in 2..(MAX_P + 1) {
-            term1 *= factor1;
-            term2 *= factor2;
-            coeff_delta *= delta;
-            self.m[p - 2] += (term1 + term2) * coeff_delta;
+                let factor_coeff = -delta * over_n;

-            let mut coeff = 1.;
-            let mut binom = IterBinomial::new(p as u64);
-            binom.next().unwrap();  // Skip k = 0.
-            for k in 1..(p - 1) {
-                coeff *= factor_coeff;
-                self.m[p - 2] += f64::approx_from(binom.next().unwrap()).unwrap() *
-                    prev_m[p - 2 - k] * coeff;
+                let prev_m = self.m;
+                for p in 2..(MAX_MOMENT + 1) {
+                    term1 *= factor1;
+                    term2 *= factor2;
+                    coeff_delta *= delta;
+                    self.m[p - 2] += (term1 + term2) * coeff_delta;
+
+                    let mut coeff = 1.;
+                    let mut binom = IterBinomial::new(p as u64);
+                    binom.next().unwrap();  // Skip k = 0.
+                    for k in 1..(p - 1) {
+                        coeff *= factor_coeff;
+                        self.m[p - 2] += f64::approx_from(binom.next().unwrap()).unwrap() *
+                            prev_m[p - 2 - k] * coeff;
+                    }
+                }
            }
        }
-    }
-}

-impl Merge for Moments {
-    #[inline]
-    fn merge(&mut self, other: &Moments) {
-        let n_a = f64::approx_from(self.n).unwrap();
-        let n_b = f64::approx_from(other.n).unwrap();
-        let delta = other.avg - self.avg;
+        impl $crate::Merge for $name {
+            #[inline]
+            fn merge(&mut self, other: &$name) {
+                let n_a = f64::approx_from(self.n).unwrap();
+                let n_b = f64::approx_from(other.n).unwrap();
+                let delta = other.avg - self.avg;

-        self.n += other.n;
-        let n = f64::approx_from(self.n).unwrap();
-        let n_a_over_n = n_a / n;
-        let n_b_over_n = n_b / n;
-        self.avg += n_b_over_n * delta;
+                self.n += other.n;
+                let n = f64::approx_from(self.n).unwrap();
+                let n_a_over_n = n_a / n;
+                let n_b_over_n = n_b / n;
+                self.avg += n_b_over_n * delta;

-        let factor_a = -n_b_over_n * delta;
-        let factor_b = n_a_over_n * delta;
-        let mut term_a = n_a * factor_a;
-        let mut term_b = n_b * factor_b;
-        let prev_m = self.m;
-        for p in 2..(MAX_P + 1) {
-            term_a *= factor_a;
-            term_b *= factor_b;
-            self.m[p - 2] += other.m[p - 2] + term_a + term_b;
+                let factor_a = -n_b_over_n * delta;
+                let factor_b = n_a_over_n * delta;
+                let mut term_a = n_a * factor_a;
+                let mut term_b = n_b * factor_b;
+                let prev_m = self.m;
+                for p in 2..(MAX_MOMENT + 1) {
+                    term_a *= factor_a;
+                    term_b *= factor_b;
+                    self.m[p - 2] += other.m[p - 2] + term_a + term_b;

-            let mut coeff_a = 1.;
-            let mut coeff_b = 1.;
-            let mut coeff_delta = 1.;
-            let mut binom = IterBinomial::new(p);
-            binom.next().unwrap();
-            for k in 1..(p - 1) {
-                coeff_a *= -n_b_over_n;
-                coeff_b *= n_a_over_n;
-                coeff_delta *= delta;
-                self.m[p - 2] += f64::approx_from(binom.next().unwrap()).unwrap() *
-                    coeff_delta *
-                    (prev_m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b);
+                    let mut coeff_a = 1.;
+                    let mut coeff_b = 1.;
+                    let mut coeff_delta = 1.;
+                    let mut binom = IterBinomial::new(p);
+                    binom.next().unwrap();
+                    for k in 1..(p - 1) {
+                        coeff_a *= -n_b_over_n;
+                        coeff_b *= n_a_over_n;
+                        coeff_delta *= delta;
+                        self.m[p - 2] += f64::approx_from(binom.next().unwrap()).unwrap() *
+                            coeff_delta *
+                            (prev_m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b);
+                    }
+                }
            }
        }
-    }
-}

-impl core::default::Default for Moments {
-    fn default() -> Moments {
-        Moments::new()
-    }
-}
+        impl core::default::Default for $name {
+            fn default() -> $name {
+                $name::new()
+            }
+        }

-impl_from_iterator!(Moments);
+        impl_from_iterator!($name);
+    );
+}
--- a/tests/moments.rs
+++ b/tests/moments.rs
@ -8,11 +8,11 @@ extern crate serde_json;

 use core::iter::Iterator;

-use average::{Moments, Merge};
+use average::{Moments4, Merge};

 #[test]
 fn trivial() {
-    let mut a = Moments::new();
+    let mut a = Moments4::new();
    assert_eq!(a.len(), 0);
    a.add(1.0);
    assert_eq!(a.len(), 1);
@ -32,7 +32,7 @@ fn trivial() {

 #[test]
 fn simple() {
-    let mut a: Moments = (1..6).map(f64::from).collect();
+    let mut a: Moments4 = (1..6).map(f64::from).collect();
    assert_eq!(a.len(), 5);
    assert_eq!(a.mean(), 3.0);
    assert_eq!(a.central_moment(0), 1.0);
@ -54,7 +54,7 @@ fn simple() {
 #[cfg(feature = "serde")]
 #[test]
 fn simple_serde() {
-    let a: Moments = (1..6).map(f64::from).collect();
+    let a: Moments4 = (1..6).map(f64::from).collect();
    let b = serde_json::to_string(&a).unwrap();
    assert_eq!(&b, "{\"n\":5,\"avg\":3.0,\"m\":[10.0,1.7763568394002506e-15,34.00000000000001]}");
    let mut c: Moments = serde_json::from_str(&b).unwrap();
@ -81,9 +81,9 @@ fn merge() {
    let sequence: &[f64] = &[1., 2., 3., -4., 5.1, 6.3, 7.3, -8., 9., 1.];
    for mid in 0..sequence.len() {
        let (left, right) = sequence.split_at(mid);
-        let avg_total: Moments = sequence.iter().collect();
-        let mut avg_left: Moments = left.iter().collect();
-        let avg_right: Moments = right.iter().collect();
+        let avg_total: Moments4 = sequence.iter().collect();
+        let mut avg_left: Moments4 = left.iter().collect();
+        let avg_right: Moments4 = right.iter().collect();
        avg_left.merge(&avg_right);
        assert_eq!(avg_total.len(), avg_left.len());
        assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);