This commit is contained in:
Andrea Cavalli 2019-12-02 12:49:54 +01:00
parent 27a14185ee
commit be1bd4c5c6
3 changed files with 201 additions and 26 deletions

View File

@ -6,12 +6,12 @@ use conv::ApproxFrom;
use super::{Estimate, Merge};
include!("mean.rs");
include!("variance.rs");
include!("variance1.rs");
include!("skewness.rs");
include!("kurtosis.rs");
/// Alias for `Variance`.
pub type MeanWithError = Variance;
/// Alias for `Variance1`.
pub type MeanWithError = Variance1;
#[doc(hidden)]
#[macro_export]
@ -301,7 +301,7 @@ macro_rules! define_moments_inner {
/// Define an estimator of all moments up to a number given at compile time.
///
/// This uses a [general algorithm][paper] and is slightly less efficient than
/// the specialized implementations (such as [`Mean`], [`Variance`],
/// the specialized implementations (such as [`Mean`], [`Variance1`],
/// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4.
///
/// (In practise, there is an upper limit due to integer overflow and possibly
@ -309,7 +309,7 @@ macro_rules! define_moments_inner {
///
/// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
/// [`Mean`]: ./struct.Mean.html
/// [`Variance`]: ./struct.Variance.html
/// [`Variance1`]: ./struct.Variance1.html
/// [`Skewness`]: ./struct.Skewness.html
/// [`Kurtosis`]: ./struct.Kurtosis.html
///

View File

@ -1,3 +1,9 @@
use crate::matrix_variance_mean::Mean;
use average::Estimate;
use average::Merge;
const ddof: f64 = 0.;
/// Estimate the arithmetic mean and the variance of a sequence of numbers
/// ("population").
///
@ -7,25 +13,25 @@
/// ## Example
///
/// ```
/// use average::Variance;
/// use average::Variance0;
///
/// let a: Variance = (1..6).map(f64::from).collect();
/// let a: Variance0 = (1..6).map(f64::from).collect();
/// println!("The mean is {} ± {}.", a.mean(), a.error());
/// ```
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct Variance {
pub struct Variance0 {
/// Estimator of average.
avg: Mean,
/// Intermediate sum of squares for calculating the variance.
sum_2: f64,
}
impl Variance {
impl Variance0 {
/// Create a new variance estimator.
#[inline]
pub fn new() -> Variance {
Variance { avg: Mean::new(), sum_2: 0. }
pub fn new() -> Variance0 {
Variance0 { avg: Mean::new(), sum_2: 0. }
}
/// Increment the sample size.
@ -49,7 +55,7 @@ impl Variance {
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let n = f64::approx_from(self.avg.len()).unwrap();
self.avg.add_inner(delta_n);
self.sum_2 += delta_n * delta_n * n * (n - 1.);
self.sum_2 += delta_n * delta_n * n * (n - ddof);
}
/// Determine whether the sample is empty.
@ -77,10 +83,10 @@ impl Variance {
/// This is an unbiased estimator of the variance of the population.
#[inline]
pub fn sample_variance(&self) -> f64 {
if self.avg.len() < 2 {
if self.avg.len() < 1 + ddof {
return 0.;
}
self.sum_2 / f64::approx_from(self.avg.len() - 1).unwrap()
self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap()
}
/// Calculate the population variance of the sample.
@ -89,7 +95,7 @@ impl Variance {
#[inline]
pub fn population_variance(&self) -> f64 {
let n = self.avg.len();
if n < 2 {
if n < 1 + ddof {
return 0.;
}
self.sum_2 / f64::approx_from(n).unwrap()
@ -107,13 +113,13 @@ impl Variance {
}
impl core::default::Default for Variance {
fn default() -> Variance {
Variance::new()
impl core::default::Default for Variance0 {
fn default() -> Variance0 {
Variance0::new()
}
}
impl Estimate for Variance {
impl Estimate for Variance0 {
#[inline]
fn add(&mut self, sample: f64) {
self.increment();
@ -128,26 +134,26 @@ impl Estimate for Variance {
}
}
impl Merge for Variance {
impl Merge for Variance0 {
/// Merge another sample into this one.
///
///
/// ## Example
///
/// ```
/// use average::{Variance, Merge};
/// use average::{Variance0, Merge};
///
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
/// let (left, right) = sequence.split_at(3);
/// let avg_total: Variance = sequence.iter().collect();
/// let mut avg_left: Variance = left.iter().collect();
/// let avg_right: Variance = right.iter().collect();
/// let avg_total: Variance0 = sequence.iter().collect();
/// let mut avg_left: Variance0 = left.iter().collect();
/// let avg_right: Variance0 = right.iter().collect();
/// avg_left.merge(&avg_right);
/// assert_eq!(avg_total.mean(), avg_left.mean());
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
/// ```
#[inline]
fn merge(&mut self, other: &Variance) {
fn merge(&mut self, other: &Variance0) {
// This algorithm was proposed by Chan et al. in 1979.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
@ -160,4 +166,4 @@ impl Merge for Variance {
}
}
impl_from_iterator!(Variance);
impl_from_iterator!(Variance0);

169
src/moments/variance1.rs Normal file
View File

@ -0,0 +1,169 @@
use crate::matrix_variance_mean::Mean;
use average::Estimate;
use average::Merge;
const ddof: f64 = 1.;
/// Estimate the arithmetic mean and the variance of a sequence of numbers
/// ("population").
///
/// This can be used to estimate the standard error of the mean.
///
///
/// ## Example
///
/// ```
/// use average::Variance1;
///
/// let a: Variance1 = (1..6).map(f64::from).collect();
/// println!("The mean is {} ± {}.", a.mean(), a.error());
/// ```
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct Variance1 {
/// Estimator of average.
avg: Mean,
/// Intermediate sum of squares for calculating the variance.
sum_2: f64,
}
impl Variance1 {
/// Create a new variance estimator.
#[inline]
pub fn new() -> Variance1 {
Variance1 { avg: Mean::new(), sum_2: 0. }
}
/// Increment the sample size.
///
/// This does not update anything else.
#[inline]
fn increment(&mut self) {
self.avg.increment();
}
/// Add an observation given an already calculated difference from the mean
/// divided by the number of samples, assuming the inner count of the sample
/// size was already updated.
///
/// This is useful for avoiding unnecessary divisions in the inner loop.
#[inline]
fn add_inner(&mut self, delta_n: f64) {
// This algorithm introduced by Welford in 1962 trades numerical
// stability for a division inside the loop.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let n = f64::approx_from(self.avg.len()).unwrap();
self.avg.add_inner(delta_n);
self.sum_2 += delta_n * delta_n * n * (n - ddof);
}
/// Determine whether the sample is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.avg.is_empty()
}
/// Estimate the mean of the population.
///
/// Returns 0 for an empty sample.
#[inline]
pub fn mean(&self) -> f64 {
self.avg.mean()
}
/// Return the sample size.
#[inline]
pub fn len(&self) -> u64 {
self.avg.len()
}
/// Calculate the sample variance.
///
/// This is an unbiased estimator of the variance of the population.
#[inline]
pub fn sample_variance(&self) -> f64 {
if self.avg.len() < 1 + ddof {
return 0.;
}
self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap()
}
/// Calculate the population variance of the sample.
///
/// This is a biased estimator of the variance of the population.
#[inline]
pub fn population_variance(&self) -> f64 {
let n = self.avg.len();
if n < 1 + ddof {
return 0.;
}
self.sum_2 / f64::approx_from(n).unwrap()
}
/// Estimate the standard error of the mean of the population.
#[inline]
pub fn error(&self) -> f64 {
let n = self.avg.len();
if n == 0 {
return 0.;
}
(self.sample_variance() / f64::approx_from(n).unwrap()).sqrt()
}
}
impl core::default::Default for Variance1 {
fn default() -> Variance1 {
Variance1::new()
}
}
impl Estimate for Variance1 {
#[inline]
fn add(&mut self, sample: f64) {
self.increment();
let delta_n = (sample - self.avg.mean())
/ f64::approx_from(self.len()).unwrap();
self.add_inner(delta_n);
}
#[inline]
fn estimate(&self) -> f64 {
self.population_variance()
}
}
impl Merge for Variance1 {
/// Merge another sample into this one.
///
///
/// ## Example
///
/// ```
/// use average::{Variance1, Merge};
///
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
/// let (left, right) = sequence.split_at(3);
/// let avg_total: Variance1 = sequence.iter().collect();
/// let mut avg_left: Variance1 = left.iter().collect();
/// let avg_right: Variance1 = right.iter().collect();
/// avg_left.merge(&avg_right);
/// assert_eq!(avg_total.mean(), avg_left.mean());
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
/// ```
#[inline]
fn merge(&mut self, other: &Variance1) {
// This algorithm was proposed by Chan et al. in 1979.
//
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
let len_self = f64::approx_from(self.len()).unwrap();
let len_other = f64::approx_from(other.len()).unwrap();
let len_total = len_self + len_other;
let delta = other.mean() - self.mean();
self.avg.merge(&other.avg);
self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
}
}
impl_from_iterator!(Variance1);