Make it possible to calculate an arbitrary number of moments
This commit is contained in:
parent
1e7a852862
commit
663009f358
@ -96,17 +96,18 @@ extern crate num_traits;
|
||||
extern crate num_integer;
|
||||
|
||||
#[macro_use] mod macros;
|
||||
mod moments;
|
||||
#[macro_use] mod moments;
|
||||
mod weighted_mean;
|
||||
mod minmax;
|
||||
mod quantile;
|
||||
mod traits;
|
||||
#[macro_use] mod histogram;
|
||||
|
||||
pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError, Moments};
|
||||
pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError};
|
||||
pub use weighted_mean::{WeightedMean, WeightedMeanWithError};
|
||||
pub use minmax::{Min, Max};
|
||||
pub use quantile::Quantile;
|
||||
pub use traits::{Estimate, Merge, Histogram};
|
||||
|
||||
define_histogram!(Histogram10, 10);
|
||||
define_moments!(Moments4, 4);
|
||||
|
@ -1,8 +1,6 @@
|
||||
use core;
|
||||
|
||||
use conv::ApproxFrom;
|
||||
use num_traits::pow;
|
||||
use num_integer::IterBinomial;
|
||||
|
||||
use super::{Estimate, Merge};
|
||||
|
||||
@ -11,217 +9,256 @@ include!("variance.rs");
|
||||
include!("skewness.rs");
|
||||
include!("kurtosis.rs");
|
||||
|
||||
|
||||
/// Alias for `Variance`.
|
||||
pub type MeanWithError = Variance;
|
||||
|
||||
/// The maximal order of the moment to be calculated.
|
||||
const MAX_P: usize = 4;
|
||||
|
||||
/// Estimate the first N moments of a sequence of numbers a sequence of numbers
|
||||
/// ("population").
|
||||
/// Define an estimator of all moments up to a number given at compile time.
|
||||
///
|
||||
/// This uses a [general algorithm][paper] and is less efficient than the
|
||||
/// specialized implementations (such as [`Mean`], [`Variance`], [`Skewness`]
|
||||
/// and [`Kurtosis`]).
|
||||
/// and [`Kurtosis`]), but it works for any number of moments >= 4.
|
||||
///
|
||||
/// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
|
||||
/// [`Mean`]: ./struct.Mean.html
|
||||
/// [`Variance`]: ./struct.Variance.html
|
||||
/// [`Skewness`]: ./struct.Skewness.html
|
||||
/// [`Kurtosis`]: ./struct.Kurtosis.html
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct Moments {
|
||||
/// Number of samples.
|
||||
///
|
||||
/// Technically, this is the same as m_0, but we want this to be an integer
|
||||
/// to avoid numerical issues, so we store it separately.
|
||||
n: u64,
|
||||
/// Average.
|
||||
avg: f64,
|
||||
/// Moments times `n`.
|
||||
///
|
||||
/// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
|
||||
m: [f64; MAX_P - 1],
|
||||
}
|
||||
///
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # extern crate core;
|
||||
/// # extern crate conv;
|
||||
/// # extern crate num_integer;
|
||||
/// # extern crate num_traits;
|
||||
/// # #[macro_use] extern crate average;
|
||||
/// # fn main() {
|
||||
/// define_moments!(Moments4, 4);
|
||||
///
|
||||
/// let mut a: Moments4 = (1..6).map(f64::from).collect();
|
||||
/// assert_eq!(a.len(), 5);
|
||||
/// assert_eq!(a.mean(), 3.0);
|
||||
/// assert_eq!(a.central_moment(0), 1.0);
|
||||
/// assert_eq!(a.central_moment(1), 0.0);
|
||||
/// assert_eq!(a.central_moment(2), 2.0);
|
||||
/// assert_eq!(a.standardized_moment(0), 5.0);
|
||||
/// assert_eq!(a.standardized_moment(1), 0.0);
|
||||
/// assert_eq!(a.standardized_moment(2), 1.0);
|
||||
/// a.add(1.0);
|
||||
/// // skewness
|
||||
/// assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15);
|
||||
/// // kurtosis
|
||||
/// assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14);
|
||||
/// # }
|
||||
/// ```
|
||||
#[macro_export]
|
||||
//#[allow(dead_code)]
|
||||
macro_rules! define_moments {
|
||||
($name:ident, $MAX_MOMENT:expr) => (
|
||||
use ::conv::ApproxFrom;
|
||||
use ::num_traits::pow;
|
||||
use ::num_integer::IterBinomial;
|
||||
|
||||
impl Moments {
|
||||
/// Create a new moments estimator.
|
||||
#[inline]
|
||||
pub fn new() -> Moments {
|
||||
Moments {
|
||||
n: 0,
|
||||
avg: 0.,
|
||||
m: [0.; MAX_P - 1],
|
||||
/// The maximal order of the moment to be calculated.
|
||||
const MAX_MOMENT: usize = $MAX_MOMENT;
|
||||
|
||||
/// Estimate the first N moments of a sequence of numbers a sequence of numbers
|
||||
/// ("population").
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct $name {
|
||||
/// Number of samples.
|
||||
///
|
||||
/// Technically, this is the same as m_0, but we want this to be an integer
|
||||
/// to avoid numerical issues, so we store it separately.
|
||||
n: u64,
|
||||
/// Average.
|
||||
avg: f64,
|
||||
/// Moments times `n`.
|
||||
///
|
||||
/// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition.
|
||||
m: [f64; MAX_MOMENT - 1],
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether the sample is empty.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.n == 0
|
||||
}
|
||||
impl $name {
|
||||
/// Create a new moments estimator.
|
||||
#[inline]
|
||||
pub fn new() -> $name {
|
||||
$name {
|
||||
n: 0,
|
||||
avg: 0.,
|
||||
m: [0.; MAX_MOMENT - 1],
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the sample size.
|
||||
#[inline]
|
||||
pub fn len(&self) -> u64 {
|
||||
self.n
|
||||
}
|
||||
/// Determine whether the sample is empty.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.n == 0
|
||||
}
|
||||
|
||||
/// Estimate the mean of the population.
|
||||
///
|
||||
/// Returns 0 for an empty sample.
|
||||
#[inline]
|
||||
pub fn mean(&self) -> f64 {
|
||||
self.avg
|
||||
}
|
||||
/// Return the sample size.
|
||||
#[inline]
|
||||
pub fn len(&self) -> u64 {
|
||||
self.n
|
||||
}
|
||||
|
||||
/// Estimate the `p`th central moment of the population.
|
||||
#[inline]
|
||||
pub fn central_moment(&self, p: usize) -> f64 {
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
match p {
|
||||
0 => 1.,
|
||||
1 => 0.,
|
||||
_ => self.m[p - 2] / n
|
||||
}
|
||||
}
|
||||
/// Estimate the mean of the population.
|
||||
///
|
||||
/// Returns 0 for an empty sample.
|
||||
#[inline]
|
||||
pub fn mean(&self) -> f64 {
|
||||
self.avg
|
||||
}
|
||||
|
||||
/// Estimate the `p`th standardized moment of the population.
|
||||
#[inline]
|
||||
pub fn standardized_moment(&self, p: usize) -> f64 {
|
||||
let variance = self.central_moment(2);
|
||||
assert_ne!(variance, 0.);
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
match p {
|
||||
0 => n,
|
||||
1 => 0.,
|
||||
2 => 1.,
|
||||
_ => self.central_moment(p) / pow(variance.sqrt(), p),
|
||||
}
|
||||
}
|
||||
/// Estimate the `p`th central moment of the population.
|
||||
#[inline]
|
||||
pub fn central_moment(&self, p: usize) -> f64 {
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
match p {
|
||||
0 => 1.,
|
||||
1 => 0.,
|
||||
_ => self.m[p - 2] / n
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the sample variance.
|
||||
///
|
||||
/// This is an unbiased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn sample_variance(&self) -> f64 {
|
||||
if self.n < 2 {
|
||||
return 0.;
|
||||
}
|
||||
self.m[0] / f64::approx_from(self.n - 1).unwrap()
|
||||
}
|
||||
/// Estimate the `p`th standardized moment of the population.
|
||||
#[inline]
|
||||
pub fn standardized_moment(&self, p: usize) -> f64 {
|
||||
let variance = self.central_moment(2);
|
||||
assert_ne!(variance, 0.);
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
match p {
|
||||
0 => n,
|
||||
1 => 0.,
|
||||
2 => 1.,
|
||||
_ => self.central_moment(p) / pow(variance.sqrt(), p),
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the sample skewness.
|
||||
#[inline]
|
||||
pub fn sample_skewness(&self) -> f64 {
|
||||
if self.n < 2 {
|
||||
return 0.;
|
||||
}
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
if self.n < 3 {
|
||||
// Method of moments
|
||||
return self.central_moment(3) /
|
||||
(n * (self.central_moment(2) / (n - 1.)).powf(1.5))
|
||||
}
|
||||
// Adjusted Fisher-Pearson standardized moment coefficient
|
||||
(n * (n - 1.)).sqrt() / (n * (n - 2.)) *
|
||||
self.central_moment(3) / (self.central_moment(2) / n).powf(1.5)
|
||||
}
|
||||
/// Calculate the sample variance.
|
||||
///
|
||||
/// This is an unbiased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn sample_variance(&self) -> f64 {
|
||||
if self.n < 2 {
|
||||
return 0.;
|
||||
}
|
||||
self.m[0] / f64::approx_from(self.n - 1).unwrap()
|
||||
}
|
||||
|
||||
/// Calculate the sample excess kurtosis.
|
||||
#[inline]
|
||||
pub fn sample_excess_kurtosis(&self) -> f64 {
|
||||
if self.n < 4 {
|
||||
return 0.;
|
||||
}
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
(n + 1.) * n * self.central_moment(4) /
|
||||
((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2)) -
|
||||
3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.))
|
||||
}
|
||||
/// Calculate the sample skewness.
|
||||
#[inline]
|
||||
pub fn sample_skewness(&self) -> f64 {
|
||||
if self.n < 2 {
|
||||
return 0.;
|
||||
}
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
if self.n < 3 {
|
||||
// Method of moments
|
||||
return self.central_moment(3) /
|
||||
(n * (self.central_moment(2) / (n - 1.)).powf(1.5))
|
||||
}
|
||||
// Adjusted Fisher-Pearson standardized moment coefficient
|
||||
(n * (n - 1.)).sqrt() / (n * (n - 2.)) *
|
||||
self.central_moment(3) / (self.central_moment(2) / n).powf(1.5)
|
||||
}
|
||||
|
||||
/// Add an observation sampled from the population.
|
||||
#[inline]
|
||||
pub fn add(&mut self, x: f64) {
|
||||
self.n += 1;
|
||||
let delta = x - self.avg;
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
self.avg += delta / n;
|
||||
/// Calculate the sample excess kurtosis.
|
||||
#[inline]
|
||||
pub fn sample_excess_kurtosis(&self) -> f64 {
|
||||
if self.n < 4 {
|
||||
return 0.;
|
||||
}
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
(n + 1.) * n * self.central_moment(4) /
|
||||
((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2)) -
|
||||
3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.))
|
||||
}
|
||||
|
||||
let mut coeff_delta = delta;
|
||||
let over_n = 1. / n;
|
||||
let mut term1 = (n - 1.) * (-over_n);
|
||||
let factor1 = -over_n;
|
||||
let mut term2 = (n - 1.) * over_n;
|
||||
let factor2 = (n - 1.) * over_n;
|
||||
/// Add an observation sampled from the population.
|
||||
#[inline]
|
||||
pub fn add(&mut self, x: f64) {
|
||||
self.n += 1;
|
||||
let delta = x - self.avg;
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
self.avg += delta / n;
|
||||
|
||||
let factor_coeff = -delta * over_n;
|
||||
let mut coeff_delta = delta;
|
||||
let over_n = 1. / n;
|
||||
let mut term1 = (n - 1.) * (-over_n);
|
||||
let factor1 = -over_n;
|
||||
let mut term2 = (n - 1.) * over_n;
|
||||
let factor2 = (n - 1.) * over_n;
|
||||
|
||||
let prev_m = self.m;
|
||||
for p in 2..(MAX_P + 1) {
|
||||
term1 *= factor1;
|
||||
term2 *= factor2;
|
||||
coeff_delta *= delta;
|
||||
self.m[p - 2] += (term1 + term2) * coeff_delta;
|
||||
let factor_coeff = -delta * over_n;
|
||||
|
||||
let mut coeff = 1.;
|
||||
let mut binom = IterBinomial::new(p as u64);
|
||||
binom.next().unwrap(); // Skip k = 0.
|
||||
for k in 1..(p - 1) {
|
||||
coeff *= factor_coeff;
|
||||
self.m[p - 2] += f64::approx_from(binom.next().unwrap()).unwrap() *
|
||||
prev_m[p - 2 - k] * coeff;
|
||||
let prev_m = self.m;
|
||||
for p in 2..(MAX_MOMENT + 1) {
|
||||
term1 *= factor1;
|
||||
term2 *= factor2;
|
||||
coeff_delta *= delta;
|
||||
self.m[p - 2] += (term1 + term2) * coeff_delta;
|
||||
|
||||
let mut coeff = 1.;
|
||||
let mut binom = IterBinomial::new(p as u64);
|
||||
binom.next().unwrap(); // Skip k = 0.
|
||||
for k in 1..(p - 1) {
|
||||
coeff *= factor_coeff;
|
||||
self.m[p - 2] += f64::approx_from(binom.next().unwrap()).unwrap() *
|
||||
prev_m[p - 2 - k] * coeff;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Merge for Moments {
|
||||
#[inline]
|
||||
fn merge(&mut self, other: &Moments) {
|
||||
let n_a = f64::approx_from(self.n).unwrap();
|
||||
let n_b = f64::approx_from(other.n).unwrap();
|
||||
let delta = other.avg - self.avg;
|
||||
impl $crate::Merge for $name {
|
||||
#[inline]
|
||||
fn merge(&mut self, other: &$name) {
|
||||
let n_a = f64::approx_from(self.n).unwrap();
|
||||
let n_b = f64::approx_from(other.n).unwrap();
|
||||
let delta = other.avg - self.avg;
|
||||
|
||||
self.n += other.n;
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
let n_a_over_n = n_a / n;
|
||||
let n_b_over_n = n_b / n;
|
||||
self.avg += n_b_over_n * delta;
|
||||
self.n += other.n;
|
||||
let n = f64::approx_from(self.n).unwrap();
|
||||
let n_a_over_n = n_a / n;
|
||||
let n_b_over_n = n_b / n;
|
||||
self.avg += n_b_over_n * delta;
|
||||
|
||||
let factor_a = -n_b_over_n * delta;
|
||||
let factor_b = n_a_over_n * delta;
|
||||
let mut term_a = n_a * factor_a;
|
||||
let mut term_b = n_b * factor_b;
|
||||
let prev_m = self.m;
|
||||
for p in 2..(MAX_P + 1) {
|
||||
term_a *= factor_a;
|
||||
term_b *= factor_b;
|
||||
self.m[p - 2] += other.m[p - 2] + term_a + term_b;
|
||||
let factor_a = -n_b_over_n * delta;
|
||||
let factor_b = n_a_over_n * delta;
|
||||
let mut term_a = n_a * factor_a;
|
||||
let mut term_b = n_b * factor_b;
|
||||
let prev_m = self.m;
|
||||
for p in 2..(MAX_MOMENT + 1) {
|
||||
term_a *= factor_a;
|
||||
term_b *= factor_b;
|
||||
self.m[p - 2] += other.m[p - 2] + term_a + term_b;
|
||||
|
||||
let mut coeff_a = 1.;
|
||||
let mut coeff_b = 1.;
|
||||
let mut coeff_delta = 1.;
|
||||
let mut binom = IterBinomial::new(p);
|
||||
binom.next().unwrap();
|
||||
for k in 1..(p - 1) {
|
||||
coeff_a *= -n_b_over_n;
|
||||
coeff_b *= n_a_over_n;
|
||||
coeff_delta *= delta;
|
||||
self.m[p - 2] += f64::approx_from(binom.next().unwrap()).unwrap() *
|
||||
coeff_delta *
|
||||
(prev_m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b);
|
||||
let mut coeff_a = 1.;
|
||||
let mut coeff_b = 1.;
|
||||
let mut coeff_delta = 1.;
|
||||
let mut binom = IterBinomial::new(p);
|
||||
binom.next().unwrap();
|
||||
for k in 1..(p - 1) {
|
||||
coeff_a *= -n_b_over_n;
|
||||
coeff_b *= n_a_over_n;
|
||||
coeff_delta *= delta;
|
||||
self.m[p - 2] += f64::approx_from(binom.next().unwrap()).unwrap() *
|
||||
coeff_delta *
|
||||
(prev_m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl core::default::Default for Moments {
|
||||
fn default() -> Moments {
|
||||
Moments::new()
|
||||
}
|
||||
}
|
||||
impl core::default::Default for $name {
|
||||
fn default() -> $name {
|
||||
$name::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl_from_iterator!(Moments);
|
||||
impl_from_iterator!($name);
|
||||
);
|
||||
}
|
||||
|
@ -8,11 +8,11 @@ extern crate serde_json;
|
||||
|
||||
use core::iter::Iterator;
|
||||
|
||||
use average::{Moments, Merge};
|
||||
use average::{Moments4, Merge};
|
||||
|
||||
#[test]
|
||||
fn trivial() {
|
||||
let mut a = Moments::new();
|
||||
let mut a = Moments4::new();
|
||||
assert_eq!(a.len(), 0);
|
||||
a.add(1.0);
|
||||
assert_eq!(a.len(), 1);
|
||||
@ -32,7 +32,7 @@ fn trivial() {
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let mut a: Moments = (1..6).map(f64::from).collect();
|
||||
let mut a: Moments4 = (1..6).map(f64::from).collect();
|
||||
assert_eq!(a.len(), 5);
|
||||
assert_eq!(a.mean(), 3.0);
|
||||
assert_eq!(a.central_moment(0), 1.0);
|
||||
@ -54,7 +54,7 @@ fn simple() {
|
||||
#[cfg(feature = "serde")]
|
||||
#[test]
|
||||
fn simple_serde() {
|
||||
let a: Moments = (1..6).map(f64::from).collect();
|
||||
let a: Moments4 = (1..6).map(f64::from).collect();
|
||||
let b = serde_json::to_string(&a).unwrap();
|
||||
assert_eq!(&b, "{\"n\":5,\"avg\":3.0,\"m\":[10.0,1.7763568394002506e-15,34.00000000000001]}");
|
||||
let mut c: Moments = serde_json::from_str(&b).unwrap();
|
||||
@ -81,9 +81,9 @@ fn merge() {
|
||||
let sequence: &[f64] = &[1., 2., 3., -4., 5.1, 6.3, 7.3, -8., 9., 1.];
|
||||
for mid in 0..sequence.len() {
|
||||
let (left, right) = sequence.split_at(mid);
|
||||
let avg_total: Moments = sequence.iter().collect();
|
||||
let mut avg_left: Moments = left.iter().collect();
|
||||
let avg_right: Moments = right.iter().collect();
|
||||
let avg_total: Moments4 = sequence.iter().collect();
|
||||
let mut avg_left: Moments4 = left.iter().collect();
|
||||
let avg_right: Moments4 = right.iter().collect();
|
||||
avg_left.merge(&avg_right);
|
||||
assert_eq!(avg_total.len(), avg_left.len());
|
||||
assert_almost_eq!(avg_total.mean(), avg_left.mean(), 1e-14);
|
||||
|
Loading…
x
Reference in New Issue
Block a user