diff --git a/src/moments/mod.rs b/src/moments/mod.rs index 5883b9d..bddfaf2 100644 --- a/src/moments/mod.rs +++ b/src/moments/mod.rs @@ -6,12 +6,12 @@ use conv::ApproxFrom; use super::{Estimate, Merge}; include!("mean.rs"); -include!("variance.rs"); +include!("variance1.rs"); include!("skewness.rs"); include!("kurtosis.rs"); -/// Alias for `Variance`. -pub type MeanWithError = Variance; +/// Alias for `Variance1`. +pub type MeanWithError = Variance1; #[doc(hidden)] #[macro_export] @@ -301,7 +301,7 @@ macro_rules! define_moments_inner { /// Define an estimator of all moments up to a number given at compile time. /// /// This uses a [general algorithm][paper] and is slightly less efficient than -/// the specialized implementations (such as [`Mean`], [`Variance`], +/// the specialized implementations (such as [`Mean`], [`Variance1`], /// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4. /// /// (In practise, there is an upper limit due to integer overflow and possibly @@ -309,7 +309,7 @@ macro_rules! define_moments_inner { /// /// [paper]: https://doi.org/10.1007/s00180-015-0637-z. /// [`Mean`]: ./struct.Mean.html -/// [`Variance`]: ./struct.Variance.html +/// [`Variance1`]: ./struct.Variance1.html /// [`Skewness`]: ./struct.Skewness.html /// [`Kurtosis`]: ./struct.Kurtosis.html /// diff --git a/src/moments/variance.rs b/src/moments/variance0.rs similarity index 79% rename from src/moments/variance.rs rename to src/moments/variance0.rs index cb79013..3fe4ed2 100644 --- a/src/moments/variance.rs +++ b/src/moments/variance0.rs @@ -1,3 +1,9 @@ +use crate::matrix_variance_mean::Mean; +use average::Estimate; +use average::Merge; + +const ddof: f64 = 0.; + /// Estimate the arithmetic mean and the variance of a sequence of numbers /// ("population"). /// @@ -7,25 +13,25 @@ /// ## Example /// /// ``` -/// use average::Variance; +/// use average::Variance0; /// -/// let a: Variance = (1..6).map(f64::from).collect(); +/// let a: Variance0 = (1..6).map(f64::from).collect(); /// println!("The mean is {} ± {}.", a.mean(), a.error()); /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] -pub struct Variance { +pub struct Variance0 { /// Estimator of average. avg: Mean, /// Intermediate sum of squares for calculating the variance. sum_2: f64, } -impl Variance { +impl Variance0 { /// Create a new variance estimator. #[inline] - pub fn new() -> Variance { - Variance { avg: Mean::new(), sum_2: 0. } + pub fn new() -> Variance0 { + Variance0 { avg: Mean::new(), sum_2: 0. } } /// Increment the sample size. @@ -49,7 +55,7 @@ impl Variance { // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. let n = f64::approx_from(self.avg.len()).unwrap(); self.avg.add_inner(delta_n); - self.sum_2 += delta_n * delta_n * n * (n - 1.); + self.sum_2 += delta_n * delta_n * n * (n - ddof); } /// Determine whether the sample is empty. @@ -77,10 +83,10 @@ impl Variance { /// This is an unbiased estimator of the variance of the population. #[inline] pub fn sample_variance(&self) -> f64 { - if self.avg.len() < 2 { + if self.avg.len() < 1 + ddof { return 0.; } - self.sum_2 / f64::approx_from(self.avg.len() - 1).unwrap() + self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap() } /// Calculate the population variance of the sample. @@ -89,7 +95,7 @@ impl Variance { #[inline] pub fn population_variance(&self) -> f64 { let n = self.avg.len(); - if n < 2 { + if n < 1 + ddof { return 0.; } self.sum_2 / f64::approx_from(n).unwrap() @@ -107,13 +113,13 @@ impl Variance { } -impl core::default::Default for Variance { - fn default() -> Variance { - Variance::new() +impl core::default::Default for Variance0 { + fn default() -> Variance0 { + Variance0::new() } } -impl Estimate for Variance { +impl Estimate for Variance0 { #[inline] fn add(&mut self, sample: f64) { self.increment(); @@ -128,26 +134,26 @@ impl Estimate for Variance { } } -impl Merge for Variance { +impl Merge for Variance0 { /// Merge another sample into this one. /// /// /// ## Example /// /// ``` - /// use average::{Variance, Merge}; + /// use average::{Variance0, Merge}; /// /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.]; /// let (left, right) = sequence.split_at(3); - /// let avg_total: Variance = sequence.iter().collect(); - /// let mut avg_left: Variance = left.iter().collect(); - /// let avg_right: Variance = right.iter().collect(); + /// let avg_total: Variance0 = sequence.iter().collect(); + /// let mut avg_left: Variance0 = left.iter().collect(); + /// let avg_right: Variance0 = right.iter().collect(); /// avg_left.merge(&avg_right); /// assert_eq!(avg_total.mean(), avg_left.mean()); /// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance()); /// ``` #[inline] - fn merge(&mut self, other: &Variance) { + fn merge(&mut self, other: &Variance0) { // This algorithm was proposed by Chan et al. in 1979. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. @@ -160,4 +166,4 @@ impl Merge for Variance { } } -impl_from_iterator!(Variance); +impl_from_iterator!(Variance0); \ No newline at end of file diff --git a/src/moments/variance1.rs b/src/moments/variance1.rs new file mode 100644 index 0000000..ab40863 --- /dev/null +++ b/src/moments/variance1.rs @@ -0,0 +1,169 @@ +use crate::matrix_variance_mean::Mean; +use average::Estimate; +use average::Merge; + +const ddof: f64 = 1.; + +/// Estimate the arithmetic mean and the variance of a sequence of numbers +/// ("population"). +/// +/// This can be used to estimate the standard error of the mean. +/// +/// +/// ## Example +/// +/// ``` +/// use average::Variance1; +/// +/// let a: Variance1 = (1..6).map(f64::from).collect(); +/// println!("The mean is {} ± {}.", a.mean(), a.error()); +/// ``` +#[derive(Debug, Clone)] +#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] +pub struct Variance1 { + /// Estimator of average. + avg: Mean, + /// Intermediate sum of squares for calculating the variance. + sum_2: f64, +} + +impl Variance1 { + /// Create a new variance estimator. + #[inline] + pub fn new() -> Variance1 { + Variance1 { avg: Mean::new(), sum_2: 0. } + } + + /// Increment the sample size. + /// + /// This does not update anything else. + #[inline] + fn increment(&mut self) { + self.avg.increment(); + } + + /// Add an observation given an already calculated difference from the mean + /// divided by the number of samples, assuming the inner count of the sample + /// size was already updated. + /// + /// This is useful for avoiding unnecessary divisions in the inner loop. + #[inline] + fn add_inner(&mut self, delta_n: f64) { + // This algorithm introduced by Welford in 1962 trades numerical + // stability for a division inside the loop. + // + // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. + let n = f64::approx_from(self.avg.len()).unwrap(); + self.avg.add_inner(delta_n); + self.sum_2 += delta_n * delta_n * n * (n - ddof); + } + + /// Determine whether the sample is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.avg.is_empty() + } + + /// Estimate the mean of the population. + /// + /// Returns 0 for an empty sample. + #[inline] + pub fn mean(&self) -> f64 { + self.avg.mean() + } + + /// Return the sample size. + #[inline] + pub fn len(&self) -> u64 { + self.avg.len() + } + + /// Calculate the sample variance. + /// + /// This is an unbiased estimator of the variance of the population. + #[inline] + pub fn sample_variance(&self) -> f64 { + if self.avg.len() < 1 + ddof { + return 0.; + } + self.sum_2 / f64::approx_from(self.avg.len() - ddof).unwrap() + } + + /// Calculate the population variance of the sample. + /// + /// This is a biased estimator of the variance of the population. + #[inline] + pub fn population_variance(&self) -> f64 { + let n = self.avg.len(); + if n < 1 + ddof { + return 0.; + } + self.sum_2 / f64::approx_from(n).unwrap() + } + + /// Estimate the standard error of the mean of the population. + #[inline] + pub fn error(&self) -> f64 { + let n = self.avg.len(); + if n == 0 { + return 0.; + } + (self.sample_variance() / f64::approx_from(n).unwrap()).sqrt() + } + +} + +impl core::default::Default for Variance1 { + fn default() -> Variance1 { + Variance1::new() + } +} + +impl Estimate for Variance1 { + #[inline] + fn add(&mut self, sample: f64) { + self.increment(); + let delta_n = (sample - self.avg.mean()) + / f64::approx_from(self.len()).unwrap(); + self.add_inner(delta_n); + } + + #[inline] + fn estimate(&self) -> f64 { + self.population_variance() + } +} + +impl Merge for Variance1 { + /// Merge another sample into this one. + /// + /// + /// ## Example + /// + /// ``` + /// use average::{Variance1, Merge}; + /// + /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.]; + /// let (left, right) = sequence.split_at(3); + /// let avg_total: Variance1 = sequence.iter().collect(); + /// let mut avg_left: Variance1 = left.iter().collect(); + /// let avg_right: Variance1 = right.iter().collect(); + /// avg_left.merge(&avg_right); + /// assert_eq!(avg_total.mean(), avg_left.mean()); + /// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance()); + /// ``` + #[inline] + fn merge(&mut self, other: &Variance1) { + // This algorithm was proposed by Chan et al. in 1979. + // + // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. + let len_self = f64::approx_from(self.len()).unwrap(); + let len_other = f64::approx_from(other.len()).unwrap(); + let len_total = len_self + len_other; + let delta = other.mean() - self.mean(); + self.avg.merge(&other.avg); + self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total; + } +} + +impl_from_iterator!(Variance1); \ No newline at end of file