variance
This commit is contained in:
parent
27a14185ee
commit
be1bd4c5c6
@ -6,12 +6,12 @@ use conv::ApproxFrom;
|
|||||||
use super::{Estimate, Merge};
|
use super::{Estimate, Merge};
|
||||||
|
|
||||||
include!("mean.rs");
|
include!("mean.rs");
|
||||||
include!("variance.rs");
|
include!("variance1.rs");
|
||||||
include!("skewness.rs");
|
include!("skewness.rs");
|
||||||
include!("kurtosis.rs");
|
include!("kurtosis.rs");
|
||||||
|
|
||||||
/// Alias for `Variance`.
|
/// Alias for `Variance1`.
|
||||||
pub type MeanWithError = Variance;
|
pub type MeanWithError = Variance1;
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
@ -301,7 +301,7 @@ macro_rules! define_moments_inner {
|
|||||||
/// Define an estimator of all moments up to a number given at compile time.
|
/// Define an estimator of all moments up to a number given at compile time.
|
||||||
///
|
///
|
||||||
/// This uses a [general algorithm][paper] and is slightly less efficient than
|
/// This uses a [general algorithm][paper] and is slightly less efficient than
|
||||||
/// the specialized implementations (such as [`Mean`], [`Variance`],
|
/// the specialized implementations (such as [`Mean`], [`Variance1`],
|
||||||
/// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4.
|
/// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4.
|
||||||
///
|
///
|
||||||
/// (In practise, there is an upper limit due to integer overflow and possibly
|
/// (In practise, there is an upper limit due to integer overflow and possibly
|
||||||
@ -309,7 +309,7 @@ macro_rules! define_moments_inner {
|
|||||||
///
|
///
|
||||||
/// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
|
/// [paper]: https://doi.org/10.1007/s00180-015-0637-z.
|
||||||
/// [`Mean`]: ./struct.Mean.html
|
/// [`Mean`]: ./struct.Mean.html
|
||||||
/// [`Variance`]: ./struct.Variance.html
|
/// [`Variance1`]: ./struct.Variance1.html
|
||||||
/// [`Skewness`]: ./struct.Skewness.html
|
/// [`Skewness`]: ./struct.Skewness.html
|
||||||
/// [`Kurtosis`]: ./struct.Kurtosis.html
|
/// [`Kurtosis`]: ./struct.Kurtosis.html
|
||||||
///
|
///
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
use crate::matrix_variance_mean::Mean;
|
||||||
|
use average::Estimate;
|
||||||
|
use average::Merge;
|
||||||
|
|
||||||
|
const ddof: f64 = 0.;
|
||||||
|
|
||||||
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
||||||
/// ("population").
|
/// ("population").
|
||||||
///
|
///
|
||||||
@ -7,25 +13,25 @@
|
|||||||
/// ## Example
|
/// ## Example
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use average::Variance;
|
/// use average::Variance0;
|
||||||
///
|
///
|
||||||
/// let a: Variance = (1..6).map(f64::from).collect();
|
/// let a: Variance0 = (1..6).map(f64::from).collect();
|
||||||
/// println!("The mean is {} ± {}.", a.mean(), a.error());
|
/// println!("The mean is {} ± {}.", a.mean(), a.error());
|
||||||
/// ```
|
/// ```
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
|
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
|
||||||
pub struct Variance {
|
pub struct Variance0 {
|
||||||
/// Estimator of average.
|
/// Estimator of average.
|
||||||
avg: Mean,
|
avg: Mean,
|
||||||
/// Intermediate sum of squares for calculating the variance.
|
/// Intermediate sum of squares for calculating the variance.
|
||||||
sum_2: f64,
|
sum_2: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Variance {
|
impl Variance0 {
|
||||||
/// Create a new variance estimator.
|
/// Create a new variance estimator.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new() -> Variance {
|
pub fn new() -> Variance0 {
|
||||||
Variance { avg: Mean::new(), sum_2: 0. }
|
Variance0 { avg: Mean::new(), sum_2: 0. }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Increment the sample size.
|
/// Increment the sample size.
|
||||||
@ -49,7 +55,7 @@ impl Variance {
|
|||||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
let n = f64::approx_from(self.avg.len()).unwrap();
|
let n = f64::approx_from(self.avg.len()).unwrap();
|
||||||
self.avg.add_inner(delta_n);
|
self.avg.add_inner(delta_n);
|
||||||
self.sum_2 += delta_n * delta_n * n * (n - 1.);
|
self.sum_2 += delta_n * delta_n * n * (n - ddof);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine whether the sample is empty.
|
/// Determine whether the sample is empty.
|
||||||
@ -77,10 +83,10 @@ impl Variance {
|
|||||||
/// This is an unbiased estimator of the variance of the population.
|
/// This is an unbiased estimator of the variance of the population.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn sample_variance(&self) -> f64 {
|
pub fn sample_variance(&self) -> f64 {
|
||||||
if self.avg.len() < 2 {
|
if self.avg.len() < 1 + ddof {
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
self.sum_2 / f64::approx_from(self.avg.len() - 1).unwrap()
|
self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the population variance of the sample.
|
/// Calculate the population variance of the sample.
|
||||||
@ -89,7 +95,7 @@ impl Variance {
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub fn population_variance(&self) -> f64 {
|
pub fn population_variance(&self) -> f64 {
|
||||||
let n = self.avg.len();
|
let n = self.avg.len();
|
||||||
if n < 2 {
|
if n < 1 + ddof {
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
self.sum_2 / f64::approx_from(n).unwrap()
|
self.sum_2 / f64::approx_from(n).unwrap()
|
||||||
@ -107,13 +113,13 @@ impl Variance {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl core::default::Default for Variance {
|
impl core::default::Default for Variance0 {
|
||||||
fn default() -> Variance {
|
fn default() -> Variance0 {
|
||||||
Variance::new()
|
Variance0::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Estimate for Variance {
|
impl Estimate for Variance0 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn add(&mut self, sample: f64) {
|
fn add(&mut self, sample: f64) {
|
||||||
self.increment();
|
self.increment();
|
||||||
@ -128,26 +134,26 @@ impl Estimate for Variance {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Merge for Variance {
|
impl Merge for Variance0 {
|
||||||
/// Merge another sample into this one.
|
/// Merge another sample into this one.
|
||||||
///
|
///
|
||||||
///
|
///
|
||||||
/// ## Example
|
/// ## Example
|
||||||
///
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// use average::{Variance, Merge};
|
/// use average::{Variance0, Merge};
|
||||||
///
|
///
|
||||||
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||||
/// let (left, right) = sequence.split_at(3);
|
/// let (left, right) = sequence.split_at(3);
|
||||||
/// let avg_total: Variance = sequence.iter().collect();
|
/// let avg_total: Variance0 = sequence.iter().collect();
|
||||||
/// let mut avg_left: Variance = left.iter().collect();
|
/// let mut avg_left: Variance0 = left.iter().collect();
|
||||||
/// let avg_right: Variance = right.iter().collect();
|
/// let avg_right: Variance0 = right.iter().collect();
|
||||||
/// avg_left.merge(&avg_right);
|
/// avg_left.merge(&avg_right);
|
||||||
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
||||||
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||||
/// ```
|
/// ```
|
||||||
#[inline]
|
#[inline]
|
||||||
fn merge(&mut self, other: &Variance) {
|
fn merge(&mut self, other: &Variance0) {
|
||||||
// This algorithm was proposed by Chan et al. in 1979.
|
// This algorithm was proposed by Chan et al. in 1979.
|
||||||
//
|
//
|
||||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
@ -160,4 +166,4 @@ impl Merge for Variance {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl_from_iterator!(Variance);
|
impl_from_iterator!(Variance0);
|
169
src/moments/variance1.rs
Normal file
169
src/moments/variance1.rs
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
use crate::matrix_variance_mean::Mean;
|
||||||
|
use average::Estimate;
|
||||||
|
use average::Merge;
|
||||||
|
|
||||||
|
const ddof: f64 = 1.;
|
||||||
|
|
||||||
|
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
||||||
|
/// ("population").
|
||||||
|
///
|
||||||
|
/// This can be used to estimate the standard error of the mean.
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use average::Variance1;
|
||||||
|
///
|
||||||
|
/// let a: Variance1 = (1..6).map(f64::from).collect();
|
||||||
|
/// println!("The mean is {} ± {}.", a.mean(), a.error());
|
||||||
|
/// ```
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
|
||||||
|
pub struct Variance1 {
|
||||||
|
/// Estimator of average.
|
||||||
|
avg: Mean,
|
||||||
|
/// Intermediate sum of squares for calculating the variance.
|
||||||
|
sum_2: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Variance1 {
|
||||||
|
/// Create a new variance estimator.
|
||||||
|
#[inline]
|
||||||
|
pub fn new() -> Variance1 {
|
||||||
|
Variance1 { avg: Mean::new(), sum_2: 0. }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increment the sample size.
|
||||||
|
///
|
||||||
|
/// This does not update anything else.
|
||||||
|
#[inline]
|
||||||
|
fn increment(&mut self) {
|
||||||
|
self.avg.increment();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an observation given an already calculated difference from the mean
|
||||||
|
/// divided by the number of samples, assuming the inner count of the sample
|
||||||
|
/// size was already updated.
|
||||||
|
///
|
||||||
|
/// This is useful for avoiding unnecessary divisions in the inner loop.
|
||||||
|
#[inline]
|
||||||
|
fn add_inner(&mut self, delta_n: f64) {
|
||||||
|
// This algorithm introduced by Welford in 1962 trades numerical
|
||||||
|
// stability for a division inside the loop.
|
||||||
|
//
|
||||||
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
|
let n = f64::approx_from(self.avg.len()).unwrap();
|
||||||
|
self.avg.add_inner(delta_n);
|
||||||
|
self.sum_2 += delta_n * delta_n * n * (n - ddof);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine whether the sample is empty.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.avg.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Estimate the mean of the population.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
|
#[inline]
|
||||||
|
pub fn mean(&self) -> f64 {
|
||||||
|
self.avg.mean()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the sample size.
|
||||||
|
#[inline]
|
||||||
|
pub fn len(&self) -> u64 {
|
||||||
|
self.avg.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate the sample variance.
|
||||||
|
///
|
||||||
|
/// This is an unbiased estimator of the variance of the population.
|
||||||
|
#[inline]
|
||||||
|
pub fn sample_variance(&self) -> f64 {
|
||||||
|
if self.avg.len() < 1 + ddof {
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate the population variance of the sample.
|
||||||
|
///
|
||||||
|
/// This is a biased estimator of the variance of the population.
|
||||||
|
#[inline]
|
||||||
|
pub fn population_variance(&self) -> f64 {
|
||||||
|
let n = self.avg.len();
|
||||||
|
if n < 1 + ddof {
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
self.sum_2 / f64::approx_from(n).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Estimate the standard error of the mean of the population.
|
||||||
|
#[inline]
|
||||||
|
pub fn error(&self) -> f64 {
|
||||||
|
let n = self.avg.len();
|
||||||
|
if n == 0 {
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
(self.sample_variance() / f64::approx_from(n).unwrap()).sqrt()
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::default::Default for Variance1 {
|
||||||
|
fn default() -> Variance1 {
|
||||||
|
Variance1::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Estimate for Variance1 {
|
||||||
|
#[inline]
|
||||||
|
fn add(&mut self, sample: f64) {
|
||||||
|
self.increment();
|
||||||
|
let delta_n = (sample - self.avg.mean())
|
||||||
|
/ f64::approx_from(self.len()).unwrap();
|
||||||
|
self.add_inner(delta_n);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn estimate(&self) -> f64 {
|
||||||
|
self.population_variance()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Merge for Variance1 {
|
||||||
|
/// Merge another sample into this one.
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use average::{Variance1, Merge};
|
||||||
|
///
|
||||||
|
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||||
|
/// let (left, right) = sequence.split_at(3);
|
||||||
|
/// let avg_total: Variance1 = sequence.iter().collect();
|
||||||
|
/// let mut avg_left: Variance1 = left.iter().collect();
|
||||||
|
/// let avg_right: Variance1 = right.iter().collect();
|
||||||
|
/// avg_left.merge(&avg_right);
|
||||||
|
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
||||||
|
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
fn merge(&mut self, other: &Variance1) {
|
||||||
|
// This algorithm was proposed by Chan et al. in 1979.
|
||||||
|
//
|
||||||
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
|
let len_self = f64::approx_from(self.len()).unwrap();
|
||||||
|
let len_other = f64::approx_from(other.len()).unwrap();
|
||||||
|
let len_total = len_self + len_other;
|
||||||
|
let delta = other.mean() - self.mean();
|
||||||
|
self.avg.merge(&other.avg);
|
||||||
|
self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl_from_iterator!(Variance1);
|
Loading…
Reference in New Issue
Block a user