variance
This commit is contained in:
parent
27a14185ee
commit
be1bd4c5c6
@ -6,12 +6,12 @@ use conv::ApproxFrom;
|
||||
use super::{Estimate, Merge};
|
||||
|
||||
include!("mean.rs");
|
||||
include!("variance.rs");
|
||||
include!("variance1.rs");
|
||||
include!("skewness.rs");
|
||||
include!("kurtosis.rs");
|
||||
|
||||
/// Alias for `Variance`.
|
||||
pub type MeanWithError = Variance;
|
||||
/// Alias for `Variance1`.
|
||||
pub type MeanWithError = Variance1;
|
||||
|
||||
#[doc(hidden)]
|
||||
#[macro_export]
|
||||
@ -301,7 +301,7 @@ macro_rules! define_moments_inner {
|
||||
/// Define an estimator of all moments up to a number given at compile time.
|
||||
///
|
||||
/// This uses a [general algorithm][paper] and is slightly less efficient than
|
||||
/// the specialized implementations (such as [`Mean`], [`Variance`],
|
||||
/// the specialized implementations (such as [`Mean`], [`Variance1`],
|
||||
/// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4.
|
||||
///
|
||||
/// (In practise, there is an upper limit due to integer overflow and possibly
|
||||
@ -309,7 +309,7 @@ macro_rules! define_moments_inner {
|
||||
///
|
||||
/// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
|
||||
/// [`Mean`]: ./struct.Mean.html
|
||||
/// [`Variance`]: ./struct.Variance.html
|
||||
/// [`Variance1`]: ./struct.Variance1.html
|
||||
/// [`Skewness`]: ./struct.Skewness.html
|
||||
/// [`Kurtosis`]: ./struct.Kurtosis.html
|
||||
///
|
||||
|
@ -1,3 +1,9 @@
|
||||
use crate::matrix_variance_mean::Mean;
|
||||
use average::Estimate;
|
||||
use average::Merge;
|
||||
|
||||
const ddof: f64 = 0.;
|
||||
|
||||
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
||||
/// ("population").
|
||||
///
|
||||
@ -7,25 +13,25 @@
|
||||
/// ## Example
|
||||
///
|
||||
/// ```
|
||||
/// use average::Variance;
|
||||
/// use average::Variance0;
|
||||
///
|
||||
/// let a: Variance = (1..6).map(f64::from).collect();
|
||||
/// let a: Variance0 = (1..6).map(f64::from).collect();
|
||||
/// println!("The mean is {} ± {}.", a.mean(), a.error());
|
||||
/// ```
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
|
||||
pub struct Variance {
|
||||
pub struct Variance0 {
|
||||
/// Estimator of average.
|
||||
avg: Mean,
|
||||
/// Intermediate sum of squares for calculating the variance.
|
||||
sum_2: f64,
|
||||
}
|
||||
|
||||
impl Variance {
|
||||
impl Variance0 {
|
||||
/// Create a new variance estimator.
|
||||
#[inline]
|
||||
pub fn new() -> Variance {
|
||||
Variance { avg: Mean::new(), sum_2: 0. }
|
||||
pub fn new() -> Variance0 {
|
||||
Variance0 { avg: Mean::new(), sum_2: 0. }
|
||||
}
|
||||
|
||||
/// Increment the sample size.
|
||||
@ -49,7 +55,7 @@ impl Variance {
|
||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||
let n = f64::approx_from(self.avg.len()).unwrap();
|
||||
self.avg.add_inner(delta_n);
|
||||
self.sum_2 += delta_n * delta_n * n * (n - 1.);
|
||||
self.sum_2 += delta_n * delta_n * n * (n - ddof);
|
||||
}
|
||||
|
||||
/// Determine whether the sample is empty.
|
||||
@ -77,10 +83,10 @@ impl Variance {
|
||||
/// This is an unbiased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn sample_variance(&self) -> f64 {
|
||||
if self.avg.len() < 2 {
|
||||
if self.avg.len() < 1 + ddof {
|
||||
return 0.;
|
||||
}
|
||||
self.sum_2 / f64::approx_from(self.avg.len() - 1).unwrap()
|
||||
self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap()
|
||||
}
|
||||
|
||||
/// Calculate the population variance of the sample.
|
||||
@ -89,7 +95,7 @@ impl Variance {
|
||||
#[inline]
|
||||
pub fn population_variance(&self) -> f64 {
|
||||
let n = self.avg.len();
|
||||
if n < 2 {
|
||||
if n < 1 + ddof {
|
||||
return 0.;
|
||||
}
|
||||
self.sum_2 / f64::approx_from(n).unwrap()
|
||||
@ -107,13 +113,13 @@ impl Variance {
|
||||
|
||||
}
|
||||
|
||||
impl core::default::Default for Variance {
|
||||
fn default() -> Variance {
|
||||
Variance::new()
|
||||
impl core::default::Default for Variance0 {
|
||||
fn default() -> Variance0 {
|
||||
Variance0::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Estimate for Variance {
|
||||
impl Estimate for Variance0 {
|
||||
#[inline]
|
||||
fn add(&mut self, sample: f64) {
|
||||
self.increment();
|
||||
@ -128,26 +134,26 @@ impl Estimate for Variance {
|
||||
}
|
||||
}
|
||||
|
||||
impl Merge for Variance {
|
||||
impl Merge for Variance0 {
|
||||
/// Merge another sample into this one.
|
||||
///
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```
|
||||
/// use average::{Variance, Merge};
|
||||
/// use average::{Variance0, Merge};
|
||||
///
|
||||
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||
/// let (left, right) = sequence.split_at(3);
|
||||
/// let avg_total: Variance = sequence.iter().collect();
|
||||
/// let mut avg_left: Variance = left.iter().collect();
|
||||
/// let avg_right: Variance = right.iter().collect();
|
||||
/// let avg_total: Variance0 = sequence.iter().collect();
|
||||
/// let mut avg_left: Variance0 = left.iter().collect();
|
||||
/// let avg_right: Variance0 = right.iter().collect();
|
||||
/// avg_left.merge(&avg_right);
|
||||
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
||||
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||
/// ```
|
||||
#[inline]
|
||||
fn merge(&mut self, other: &Variance) {
|
||||
fn merge(&mut self, other: &Variance0) {
|
||||
// This algorithm was proposed by Chan et al. in 1979.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||
@ -160,4 +166,4 @@ impl Merge for Variance {
|
||||
}
|
||||
}
|
||||
|
||||
impl_from_iterator!(Variance);
|
||||
impl_from_iterator!(Variance0);
|
169
src/moments/variance1.rs
Normal file
169
src/moments/variance1.rs
Normal file
@ -0,0 +1,169 @@
|
||||
use crate::matrix_variance_mean::Mean;
|
||||
use average::Estimate;
|
||||
use average::Merge;
|
||||
|
||||
const ddof: f64 = 1.;
|
||||
|
||||
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
||||
/// ("population").
|
||||
///
|
||||
/// This can be used to estimate the standard error of the mean.
|
||||
///
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```
|
||||
/// use average::Variance1;
|
||||
///
|
||||
/// let a: Variance1 = (1..6).map(f64::from).collect();
|
||||
/// println!("The mean is {} ± {}.", a.mean(), a.error());
|
||||
/// ```
|
||||
#[derive(Debug, Clone)]
|
||||
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
|
||||
pub struct Variance1 {
|
||||
/// Estimator of average.
|
||||
avg: Mean,
|
||||
/// Intermediate sum of squares for calculating the variance.
|
||||
sum_2: f64,
|
||||
}
|
||||
|
||||
impl Variance1 {
|
||||
/// Create a new variance estimator.
|
||||
#[inline]
|
||||
pub fn new() -> Variance1 {
|
||||
Variance1 { avg: Mean::new(), sum_2: 0. }
|
||||
}
|
||||
|
||||
/// Increment the sample size.
|
||||
///
|
||||
/// This does not update anything else.
|
||||
#[inline]
|
||||
fn increment(&mut self) {
|
||||
self.avg.increment();
|
||||
}
|
||||
|
||||
/// Add an observation given an already calculated difference from the mean
|
||||
/// divided by the number of samples, assuming the inner count of the sample
|
||||
/// size was already updated.
|
||||
///
|
||||
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
||||
#[inline]
|
||||
fn add_inner(&mut self, delta_n: f64) {
|
||||
// This algorithm introduced by Welford in 1962 trades numerical
|
||||
// stability for a division inside the loop.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||
let n = f64::approx_from(self.avg.len()).unwrap();
|
||||
self.avg.add_inner(delta_n);
|
||||
self.sum_2 += delta_n * delta_n * n * (n - ddof);
|
||||
}
|
||||
|
||||
/// Determine whether the sample is empty.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.avg.is_empty()
|
||||
}
|
||||
|
||||
/// Estimate the mean of the population.
|
||||
///
|
||||
/// Returns 0 for an empty sample.
|
||||
#[inline]
|
||||
pub fn mean(&self) -> f64 {
|
||||
self.avg.mean()
|
||||
}
|
||||
|
||||
/// Return the sample size.
|
||||
#[inline]
|
||||
pub fn len(&self) -> u64 {
|
||||
self.avg.len()
|
||||
}
|
||||
|
||||
/// Calculate the sample variance.
|
||||
///
|
||||
/// This is an unbiased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn sample_variance(&self) -> f64 {
|
||||
if self.avg.len() < 1 + ddof {
|
||||
return 0.;
|
||||
}
|
||||
self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap()
|
||||
}
|
||||
|
||||
/// Calculate the population variance of the sample.
|
||||
///
|
||||
/// This is a biased estimator of the variance of the population.
|
||||
#[inline]
|
||||
pub fn population_variance(&self) -> f64 {
|
||||
let n = self.avg.len();
|
||||
if n < 1 + ddof {
|
||||
return 0.;
|
||||
}
|
||||
self.sum_2 / f64::approx_from(n).unwrap()
|
||||
}
|
||||
|
||||
/// Estimate the standard error of the mean of the population.
|
||||
#[inline]
|
||||
pub fn error(&self) -> f64 {
|
||||
let n = self.avg.len();
|
||||
if n == 0 {
|
||||
return 0.;
|
||||
}
|
||||
(self.sample_variance() / f64::approx_from(n).unwrap()).sqrt()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl core::default::Default for Variance1 {
|
||||
fn default() -> Variance1 {
|
||||
Variance1::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Estimate for Variance1 {
|
||||
#[inline]
|
||||
fn add(&mut self, sample: f64) {
|
||||
self.increment();
|
||||
let delta_n = (sample - self.avg.mean())
|
||||
/ f64::approx_from(self.len()).unwrap();
|
||||
self.add_inner(delta_n);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn estimate(&self) -> f64 {
|
||||
self.population_variance()
|
||||
}
|
||||
}
|
||||
|
||||
impl Merge for Variance1 {
|
||||
/// Merge another sample into this one.
|
||||
///
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// ```
|
||||
/// use average::{Variance1, Merge};
|
||||
///
|
||||
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||
/// let (left, right) = sequence.split_at(3);
|
||||
/// let avg_total: Variance1 = sequence.iter().collect();
|
||||
/// let mut avg_left: Variance1 = left.iter().collect();
|
||||
/// let avg_right: Variance1 = right.iter().collect();
|
||||
/// avg_left.merge(&avg_right);
|
||||
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
||||
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||
/// ```
|
||||
#[inline]
|
||||
fn merge(&mut self, other: &Variance1) {
|
||||
// This algorithm was proposed by Chan et al. in 1979.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||
let len_self = f64::approx_from(self.len()).unwrap();
|
||||
let len_other = f64::approx_from(other.len()).unwrap();
|
||||
let len_total = len_self + len_other;
|
||||
let delta = other.mean() - self.mean();
|
||||
self.avg.merge(&other.avg);
|
||||
self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
|
||||
}
|
||||
}
|
||||
|
||||
impl_from_iterator!(Variance1);
|
Loading…
Reference in New Issue
Block a user