Factor out calculation of average
Now it is possible to calculate the average without calculating the error.
This commit is contained in:
parent
962adb91d7
commit
a95ab05c10
188
src/average.rs
188
src/average.rs
@ -2,6 +2,118 @@ use core;
|
|||||||
|
|
||||||
use conv::ApproxFrom;
|
use conv::ApproxFrom;
|
||||||
|
|
||||||
|
|
||||||
|
/// Estimate the arithmetic mean of a sequence of numbers ("population").
|
||||||
|
///
|
||||||
|
/// Everything is calculated iteratively using constant memory, so the sequence
|
||||||
|
/// of numbers can be an iterator. The used algorithms try to avoid numerical
|
||||||
|
/// instabilities.
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use average::Average;
|
||||||
|
///
|
||||||
|
/// let a: Average = (1..6).map(Into::into).collect();
|
||||||
|
/// println!("The average is {}.", a.mean());
|
||||||
|
/// ```
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Average {
|
||||||
|
/// Average value.
|
||||||
|
avg: f64,
|
||||||
|
/// Sample size.
|
||||||
|
n: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Average {
|
||||||
|
/// Create a new average estimator.
|
||||||
|
pub fn new() -> Average {
|
||||||
|
Average { avg: 0., n: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add an element sampled from the population.
|
||||||
|
#[inline]
|
||||||
|
pub fn add(&mut self, sample: f64) {
|
||||||
|
// This algorithm introduced by Welford in 1962 trades numerical
|
||||||
|
// stability for a division inside the loop.
|
||||||
|
//
|
||||||
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
|
self.n += 1;
|
||||||
|
let delta = sample - self.avg;
|
||||||
|
self.avg += delta / f64::approx_from(self.n).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine whether the samples are empty.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.n == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Estimate the mean of the population.
|
||||||
|
#[inline]
|
||||||
|
pub fn mean(&self) -> f64 {
|
||||||
|
self.avg
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the number of samples.
|
||||||
|
#[inline]
|
||||||
|
pub fn len(&self) -> u64 {
|
||||||
|
self.n
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge another sample into this one.
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use average::Average;
|
||||||
|
///
|
||||||
|
/// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||||
|
/// let (left, right) = sequence.split_at(3);
|
||||||
|
/// let avg_total: Average = sequence.iter().map(|x| *x).collect();
|
||||||
|
/// let mut avg_left: Average = left.iter().map(|x| *x).collect();
|
||||||
|
/// let avg_right: Average = right.iter().map(|x| *x).collect();
|
||||||
|
/// avg_left.merge(&avg_right);
|
||||||
|
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn merge(&mut self, other: &Average) {
|
||||||
|
// This algorithm was proposed by Chan et al. in 1979.
|
||||||
|
//
|
||||||
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
|
let len_self = f64::approx_from(self.n).unwrap();
|
||||||
|
let len_other = f64::approx_from(other.n).unwrap();
|
||||||
|
let len_total = len_self + len_other;
|
||||||
|
self.n += other.n;
|
||||||
|
self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
|
||||||
|
// Chan et al. use
|
||||||
|
//
|
||||||
|
// self.avg += delta * len_other / len_total;
|
||||||
|
//
|
||||||
|
// instead but this results in cancelation if the number of samples are similar.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::default::Default for Average {
|
||||||
|
fn default() -> Average {
|
||||||
|
Average::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::iter::FromIterator<f64> for Average {
|
||||||
|
fn from_iter<T>(iter: T) -> Average
|
||||||
|
where T: IntoIterator<Item=f64>
|
||||||
|
{
|
||||||
|
let mut a = Average::new();
|
||||||
|
for i in iter {
|
||||||
|
a.add(i);
|
||||||
|
}
|
||||||
|
a
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
/// Estimate the arithmetic mean and the variance of a sequence of numbers
|
||||||
/// ("population").
|
/// ("population").
|
||||||
///
|
///
|
||||||
@ -22,10 +134,8 @@ use conv::ApproxFrom;
|
|||||||
/// ```
|
/// ```
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct AverageWithError {
|
pub struct AverageWithError {
|
||||||
/// Average value.
|
/// Estimator of average.
|
||||||
avg: f64,
|
avg: Average,
|
||||||
/// Number of samples.
|
|
||||||
n: u64,
|
|
||||||
/// Intermediate sum of squares for calculating the variance.
|
/// Intermediate sum of squares for calculating the variance.
|
||||||
v: f64,
|
v: f64,
|
||||||
}
|
}
|
||||||
@ -33,7 +143,7 @@ pub struct AverageWithError {
|
|||||||
impl AverageWithError {
|
impl AverageWithError {
|
||||||
/// Create a new average estimator.
|
/// Create a new average estimator.
|
||||||
pub fn new() -> AverageWithError {
|
pub fn new() -> AverageWithError {
|
||||||
AverageWithError { avg: 0., n: 0, v: 0. }
|
AverageWithError { avg: Average::new(), v: 0. }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add an element sampled from the population.
|
/// Add an element sampled from the population.
|
||||||
@ -43,53 +153,60 @@ impl AverageWithError {
|
|||||||
// stability for a division inside the loop.
|
// stability for a division inside the loop.
|
||||||
//
|
//
|
||||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
self.n += 1;
|
let delta = sample - self.avg.mean();
|
||||||
let delta = sample - self.avg;
|
self.avg.add(sample);
|
||||||
self.avg += delta / f64::approx_from(self.n).unwrap();
|
self.v += delta * (sample - self.avg.mean());
|
||||||
self.v += delta * (sample - self.avg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine whether the samples are empty.
|
/// Determine whether the samples are empty.
|
||||||
|
#[inline]
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.n == 0
|
self.avg.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the mean of the population.
|
/// Estimate the mean of the population.
|
||||||
|
#[inline]
|
||||||
pub fn mean(&self) -> f64 {
|
pub fn mean(&self) -> f64 {
|
||||||
self.avg
|
self.avg.mean()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the number of samples.
|
/// Return the number of samples.
|
||||||
|
#[inline]
|
||||||
pub fn len(&self) -> u64 {
|
pub fn len(&self) -> u64 {
|
||||||
self.n
|
self.avg.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the sample variance.
|
/// Calculate the sample variance.
|
||||||
///
|
///
|
||||||
/// This is an unbiased estimator of the variance of the population.
|
/// This is an unbiased estimator of the variance of the population.
|
||||||
|
#[inline]
|
||||||
pub fn sample_variance(&self) -> f64 {
|
pub fn sample_variance(&self) -> f64 {
|
||||||
if self.n < 2 {
|
if self.avg.len() < 2 {
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
self.v / f64::approx_from(self.n - 1).unwrap()
|
self.v / f64::approx_from(self.avg.len() - 1).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the population variance of the sample.
|
/// Calculate the population variance of the sample.
|
||||||
///
|
///
|
||||||
/// This is a biased estimator of the variance of the population.
|
/// This is a biased estimator of the variance of the population.
|
||||||
|
#[inline]
|
||||||
pub fn population_variance(&self) -> f64 {
|
pub fn population_variance(&self) -> f64 {
|
||||||
if self.n < 2 {
|
let n = self.avg.len();
|
||||||
|
if n < 2 {
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
self.v / f64::approx_from(self.n).unwrap()
|
self.v / f64::approx_from(n).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the standard error of the mean of the population.
|
/// Estimate the standard error of the mean of the population.
|
||||||
|
#[inline]
|
||||||
pub fn error(&self) -> f64 {
|
pub fn error(&self) -> f64 {
|
||||||
if self.n == 0 {
|
let n = self.avg.len();
|
||||||
|
if n == 0 {
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
(self.sample_variance() / f64::approx_from(self.n).unwrap()).sqrt()
|
(self.sample_variance() / f64::approx_from(n).unwrap()).sqrt()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Merge another sample into this one.
|
/// Merge another sample into this one.
|
||||||
@ -109,21 +226,16 @@ impl AverageWithError {
|
|||||||
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
/// assert_eq!(avg_total.mean(), avg_left.mean());
|
||||||
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
/// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||||
/// ```
|
/// ```
|
||||||
|
#[inline]
|
||||||
pub fn merge(&mut self, other: &AverageWithError) {
|
pub fn merge(&mut self, other: &AverageWithError) {
|
||||||
// This algorithm was proposed by Chan et al. in 1979.
|
// This algorithm was proposed by Chan et al. in 1979.
|
||||||
//
|
//
|
||||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
||||||
let delta = other.avg - self.avg;
|
let len_self = f64::approx_from(self.len()).unwrap();
|
||||||
let len_self = f64::approx_from(self.n).unwrap();
|
let len_other = f64::approx_from(other.len()).unwrap();
|
||||||
let len_other = f64::approx_from(other.n).unwrap();
|
|
||||||
let len_total = len_self + len_other;
|
let len_total = len_self + len_other;
|
||||||
self.n += other.n;
|
let delta = other.mean() - self.mean();
|
||||||
self.avg = (len_self * self.avg + len_other * other.avg) / len_total;
|
self.avg.merge(&other.avg);
|
||||||
// Chan et al. use
|
|
||||||
//
|
|
||||||
// self.avg += delta * len_other / len_total;
|
|
||||||
//
|
|
||||||
// instead but this results in cancelation if the number of samples are similar.
|
|
||||||
self.v += other.v + delta*delta * len_self * len_other / len_total;
|
self.v += other.v + delta*delta * len_self * len_other / len_total;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -145,23 +257,3 @@ impl core::iter::FromIterator<f64> for AverageWithError {
|
|||||||
a
|
a
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn merge() {
|
|
||||||
let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
|
||||||
for mid in 0..sequence.len() {
|
|
||||||
let (left, right) = sequence.split_at(mid);
|
|
||||||
let avg_total: AverageWithError = sequence.iter().map(|x| *x).collect();
|
|
||||||
let mut avg_left: AverageWithError = left.iter().map(|x| *x).collect();
|
|
||||||
let avg_right: AverageWithError = right.iter().map(|x| *x).collect();
|
|
||||||
avg_left.merge(&avg_right);
|
|
||||||
assert_eq!(avg_total.n, avg_left.n);
|
|
||||||
assert_eq!(avg_total.avg, avg_left.avg);
|
|
||||||
assert_eq!(avg_total.v, avg_left.v);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
17
src/lib.rs
17
src/lib.rs
@ -2,8 +2,8 @@
|
|||||||
//! sequence of numbers, and for their standard errors. The typical workflow
|
//! sequence of numbers, and for their standard errors. The typical workflow
|
||||||
//! looks like this:
|
//! looks like this:
|
||||||
//!
|
//!
|
||||||
//! 1. Initialize your estimator of choice ([`AverageWithError`] or
|
//! 1. Initialize your estimator of choice ([`Average`], [`AverageWithError`],
|
||||||
//! [`WeightedAverageWithError`]) with `new()`.
|
//! [`WeightedAverage`] or [`WeightedAverageWithError`]) with `new()`.
|
||||||
//! 2. Add some subset (called "samples") of the sequence of numbers (called
|
//! 2. Add some subset (called "samples") of the sequence of numbers (called
|
||||||
//! "population") for which you want to estimate the average, using `add()`
|
//! "population") for which you want to estimate the average, using `add()`
|
||||||
//! or `collect()`.
|
//! or `collect()`.
|
||||||
@ -13,8 +13,11 @@
|
|||||||
//! You can run several estimators in parallel and merge them into one with
|
//! You can run several estimators in parallel and merge them into one with
|
||||||
//! `merge()`.
|
//! `merge()`.
|
||||||
//!
|
//!
|
||||||
//! [`AverageWithError`]: ./average/struct.Average.html
|
//! [`Average`]: ./average/struct.Average.html
|
||||||
//! [`WeightedAverageWithError`]: ./weighted_average/struct.WeightedAverage.html
|
//! [`AverageWithError`]: ./average/struct.AverageWithError.html
|
||||||
|
//! [`WeightedAverage`]: ./weighted_average/struct.WeightedAverage.html
|
||||||
|
//! [`WeightedAverageWithError`]: ./weighted_average/struct.WeightedAverageWithError.html
|
||||||
|
//!
|
||||||
//!
|
//!
|
||||||
//! ## Example
|
//! ## Example
|
||||||
//!
|
//!
|
||||||
@ -29,12 +32,10 @@
|
|||||||
#![no_std]
|
#![no_std]
|
||||||
|
|
||||||
extern crate conv;
|
extern crate conv;
|
||||||
#[cfg(test)] extern crate rand;
|
|
||||||
#[cfg(test)] #[macro_use] extern crate std;
|
|
||||||
|
|
||||||
#[macro_use] mod macros;
|
#[macro_use] mod macros;
|
||||||
mod average;
|
mod average;
|
||||||
mod weighted_average;
|
mod weighted_average;
|
||||||
|
|
||||||
pub use average::AverageWithError;
|
pub use average::{Average, AverageWithError};
|
||||||
pub use weighted_average::WeightedAverageWithError;
|
pub use weighted_average::{WeightedAverage, WeightedAverageWithError};
|
||||||
|
@ -2,6 +2,117 @@ use core;
|
|||||||
|
|
||||||
use super::AverageWithError;
|
use super::AverageWithError;
|
||||||
|
|
||||||
|
|
||||||
|
/// Estimate the weighted and unweighted arithmetic mean of a sequence of
|
||||||
|
/// numbers ("population").
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use average::WeightedAverage;
|
||||||
|
///
|
||||||
|
/// let a: WeightedAverage = (1..6).zip(1..6)
|
||||||
|
/// .map(|(x, w)| (f64::from(x), f64::from(w))).collect();
|
||||||
|
/// println!("The weighted average is {}.", a.mean());
|
||||||
|
/// ```
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct WeightedAverage {
|
||||||
|
/// Sum of the weights.
|
||||||
|
weight_sum: f64,
|
||||||
|
/// Weighted average value.
|
||||||
|
weighted_avg: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WeightedAverage {
|
||||||
|
/// Create a new weighted and unweighted average estimator.
|
||||||
|
pub fn new() -> WeightedAverage {
|
||||||
|
WeightedAverage {
|
||||||
|
weight_sum: 0., weighted_avg: 0.,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a weighted element sampled from the population.
|
||||||
|
#[inline]
|
||||||
|
pub fn add(&mut self, sample: f64, weight: f64) {
|
||||||
|
// The algorithm for the unweighted average was suggested by Welford in 1962.
|
||||||
|
//
|
||||||
|
// See
|
||||||
|
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
||||||
|
// and
|
||||||
|
// http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
|
||||||
|
self.weight_sum += weight;
|
||||||
|
|
||||||
|
let prev_avg = self.weighted_avg;
|
||||||
|
self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine whether the sample is empty.
|
||||||
|
///
|
||||||
|
/// Might be a false positive if the sum of weights is zero.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.weight_sum == 0.
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the sum of the weights.
|
||||||
|
#[inline]
|
||||||
|
pub fn sum_weights(&self) -> f64 {
|
||||||
|
self.weight_sum
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Estimate the weighted mean of the sequence.
|
||||||
|
#[inline]
|
||||||
|
pub fn mean(&self) -> f64 {
|
||||||
|
self.weighted_avg
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Merge another sample into this one.
|
||||||
|
///
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use average::WeightedAverage;
|
||||||
|
///
|
||||||
|
/// let weighted_sequence: &[(f64, f64)] = &[
|
||||||
|
/// (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
|
||||||
|
/// (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)];
|
||||||
|
/// let (left, right) = weighted_sequence.split_at(3);
|
||||||
|
/// let avg_total: WeightedAverage = weighted_sequence.iter().map(|&x| x).collect();
|
||||||
|
/// let mut avg_left: WeightedAverage = left.iter().map(|&x| x).collect();
|
||||||
|
/// let avg_right: WeightedAverage = right.iter().map(|&x| x).collect();
|
||||||
|
/// avg_left.merge(&avg_right);
|
||||||
|
/// assert!((avg_total.mean() - avg_left.mean()).abs() < 1e-15);
|
||||||
|
/// ```
|
||||||
|
#[inline]
|
||||||
|
pub fn merge(&mut self, other: &WeightedAverage) {
|
||||||
|
let total_weight_sum = self.weight_sum + other.weight_sum;
|
||||||
|
self.weighted_avg = (self.weight_sum * self.weighted_avg
|
||||||
|
+ other.weight_sum * other.weighted_avg)
|
||||||
|
/ total_weight_sum;
|
||||||
|
self.weight_sum = total_weight_sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::default::Default for WeightedAverage {
|
||||||
|
fn default() -> WeightedAverage {
|
||||||
|
WeightedAverage::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl core::iter::FromIterator<(f64, f64)> for WeightedAverage {
|
||||||
|
fn from_iter<T>(iter: T) -> WeightedAverage
|
||||||
|
where T: IntoIterator<Item=(f64, f64)>
|
||||||
|
{
|
||||||
|
let mut a = WeightedAverage::new();
|
||||||
|
for (i, w) in iter {
|
||||||
|
a.add(i, w);
|
||||||
|
}
|
||||||
|
a
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Estimate the weighted and unweighted arithmetic mean and the unweighted
|
/// Estimate the weighted and unweighted arithmetic mean and the unweighted
|
||||||
/// variance of a sequence of numbers ("population").
|
/// variance of a sequence of numbers ("population").
|
||||||
///
|
///
|
||||||
@ -19,13 +130,10 @@ use super::AverageWithError;
|
|||||||
/// ```
|
/// ```
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct WeightedAverageWithError {
|
pub struct WeightedAverageWithError {
|
||||||
/// Sum of the weights.
|
|
||||||
weight_sum: f64,
|
|
||||||
/// Sum of the squares of the weights.
|
/// Sum of the squares of the weights.
|
||||||
weight_sum_sq: f64,
|
weight_sum_sq: f64,
|
||||||
/// Weighted average value.
|
/// Estimator of the weighted average.
|
||||||
weighted_avg: f64,
|
weighted_avg: WeightedAverage,
|
||||||
|
|
||||||
/// Estimator of unweighted average and its variance.
|
/// Estimator of unweighted average and its variance.
|
||||||
unweighted_avg: AverageWithError,
|
unweighted_avg: AverageWithError,
|
||||||
}
|
}
|
||||||
@ -34,7 +142,8 @@ impl WeightedAverageWithError {
|
|||||||
/// Create a new weighted and unweighted average estimator.
|
/// Create a new weighted and unweighted average estimator.
|
||||||
pub fn new() -> WeightedAverageWithError {
|
pub fn new() -> WeightedAverageWithError {
|
||||||
WeightedAverageWithError {
|
WeightedAverageWithError {
|
||||||
weight_sum: 0., weight_sum_sq: 0., weighted_avg: 0.,
|
weight_sum_sq: 0.,
|
||||||
|
weighted_avg: WeightedAverage::new(),
|
||||||
unweighted_avg: AverageWithError::new(),
|
unweighted_avg: AverageWithError::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -49,51 +158,55 @@ impl WeightedAverageWithError {
|
|||||||
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
||||||
// and
|
// and
|
||||||
// http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
|
// http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
|
||||||
self.weight_sum += weight;
|
|
||||||
self.weight_sum_sq += weight*weight;
|
self.weight_sum_sq += weight*weight;
|
||||||
|
self.weighted_avg.add(sample, weight);
|
||||||
let prev_avg = self.weighted_avg;
|
|
||||||
self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg);
|
|
||||||
|
|
||||||
self.unweighted_avg.add(sample);
|
self.unweighted_avg.add(sample);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine whether the sample is empty.
|
/// Determine whether the sample is empty.
|
||||||
|
#[inline]
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.unweighted_avg.is_empty()
|
self.unweighted_avg.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the sum of the weights.
|
/// Return the sum of the weights.
|
||||||
|
#[inline]
|
||||||
pub fn sum_weights(&self) -> f64 {
|
pub fn sum_weights(&self) -> f64 {
|
||||||
self.weight_sum
|
self.weighted_avg.sum_weights()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the sum of the squared weights.
|
/// Return the sum of the squared weights.
|
||||||
|
#[inline]
|
||||||
pub fn sum_weights_sq(&self) -> f64 {
|
pub fn sum_weights_sq(&self) -> f64 {
|
||||||
self.weight_sum_sq
|
self.weight_sum_sq
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the weighted mean of the sequence.
|
/// Estimate the weighted mean of the sequence.
|
||||||
|
#[inline]
|
||||||
pub fn weighted_mean(&self) -> f64 {
|
pub fn weighted_mean(&self) -> f64 {
|
||||||
self.weighted_avg
|
self.weighted_avg.mean()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the unweighted mean of the sequence.
|
/// Estimate the unweighted mean of the sequence.
|
||||||
|
#[inline]
|
||||||
pub fn unweighted_mean(&self) -> f64 {
|
pub fn unweighted_mean(&self) -> f64 {
|
||||||
self.unweighted_avg.mean()
|
self.unweighted_avg.mean()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return sample size.
|
/// Return sample size.
|
||||||
|
#[inline]
|
||||||
pub fn len(&self) -> u64 {
|
pub fn len(&self) -> u64 {
|
||||||
self.unweighted_avg.len()
|
self.unweighted_avg.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the effective sample size.
|
/// Calculate the effective sample size.
|
||||||
|
#[inline]
|
||||||
pub fn effective_len(&self) -> f64 {
|
pub fn effective_len(&self) -> f64 {
|
||||||
if self.is_empty() {
|
if self.is_empty() {
|
||||||
return 0.
|
return 0.
|
||||||
}
|
}
|
||||||
self.weight_sum * self.weight_sum / self.weight_sum_sq
|
let weight_sum = self.weighted_avg.sum_weights();
|
||||||
|
weight_sum * weight_sum / self.weight_sum_sq
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the *unweighted* population variance of the sample.
|
/// Calculate the *unweighted* population variance of the sample.
|
||||||
@ -121,10 +234,11 @@ impl WeightedAverageWithError {
|
|||||||
// results than the ones used by SPSS or Mentor.
|
// results than the ones used by SPSS or Mentor.
|
||||||
//
|
//
|
||||||
// See http://www.analyticalgroup.com/download/WEIGHTED_VARIANCE.pdf.
|
// See http://www.analyticalgroup.com/download/WEIGHTED_VARIANCE.pdf.
|
||||||
if self.weight_sum == 0. {
|
let weight_sum = self.weighted_avg.sum_weights();
|
||||||
|
if weight_sum == 0. {
|
||||||
return 0.;
|
return 0.;
|
||||||
}
|
}
|
||||||
let inv_effective_len = self.weight_sum_sq / (self.weight_sum * self.weight_sum);
|
let inv_effective_len = self.weight_sum_sq / (weight_sum * weight_sum);
|
||||||
(self.sample_variance() * inv_effective_len).sqrt()
|
(self.sample_variance() * inv_effective_len).sqrt()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,13 +262,8 @@ impl WeightedAverageWithError {
|
|||||||
/// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15);
|
/// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15);
|
||||||
/// ```
|
/// ```
|
||||||
pub fn merge(&mut self, other: &WeightedAverageWithError) {
|
pub fn merge(&mut self, other: &WeightedAverageWithError) {
|
||||||
let total_weight_sum = self.weight_sum + other.weight_sum;
|
|
||||||
self.weighted_avg = (self.weight_sum * self.weighted_avg
|
|
||||||
+ other.weight_sum * other.weighted_avg)
|
|
||||||
/ total_weight_sum;
|
|
||||||
self.weight_sum = total_weight_sum;
|
|
||||||
self.weight_sum_sq += other.weight_sum_sq;
|
self.weight_sum_sq += other.weight_sum_sq;
|
||||||
|
self.weighted_avg.merge(&other.weighted_avg);
|
||||||
self.unweighted_avg.merge(&other.unweighted_avg);
|
self.unweighted_avg.merge(&other.unweighted_avg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -176,51 +285,3 @@ impl core::iter::FromIterator<(f64, f64)> for WeightedAverageWithError {
|
|||||||
a
|
a
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn merge_unweighted() {
|
|
||||||
let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
|
||||||
for mid in 0..sequence.len() {
|
|
||||||
let (left, right) = sequence.split_at(mid);
|
|
||||||
let avg_total: WeightedAverageWithError = sequence.iter().map(|x| (*x, 1.)).collect();
|
|
||||||
let mut avg_left: WeightedAverageWithError = left.iter().map(|x| (*x, 1.)).collect();
|
|
||||||
let avg_right: WeightedAverageWithError = right.iter().map(|x| (*x, 1.)).collect();
|
|
||||||
avg_left.merge(&avg_right);
|
|
||||||
|
|
||||||
assert_eq!(avg_total.weight_sum, avg_left.weight_sum);
|
|
||||||
assert_eq!(avg_total.weight_sum_sq, avg_left.weight_sum_sq);
|
|
||||||
assert_eq!(avg_total.weighted_avg, avg_left.weighted_avg);
|
|
||||||
|
|
||||||
assert_eq!(avg_total.unweighted_avg.len(), avg_left.unweighted_avg.len());
|
|
||||||
assert_eq!(avg_total.unweighted_avg.mean(), avg_left.unweighted_avg.mean());
|
|
||||||
assert_eq!(avg_total.unweighted_avg.sample_variance(),
|
|
||||||
avg_left.unweighted_avg.sample_variance());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn merge_weighted() {
|
|
||||||
let sequence: &[(f64, f64)] = &[
|
|
||||||
(1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
|
|
||||||
(6., 0.6), (7., 0.7), (8., 0.8), (9., 0.)];
|
|
||||||
for mid in 0..sequence.len() {
|
|
||||||
let (left, right) = sequence.split_at(mid);
|
|
||||||
let avg_total: WeightedAverageWithError = sequence.iter().map(|&(x, w)| (x, w)).collect();
|
|
||||||
let mut avg_left: WeightedAverageWithError = left.iter().map(|&(x, w)| (x, w)).collect();
|
|
||||||
let avg_right: WeightedAverageWithError = right.iter().map(|&(x, w)| (x, w)).collect();
|
|
||||||
avg_left.merge(&avg_right);
|
|
||||||
assert_eq!(avg_total.unweighted_avg.len(), avg_left.unweighted_avg.len());
|
|
||||||
assert_almost_eq!(avg_total.weight_sum, avg_left.weight_sum, 1e-15);
|
|
||||||
assert_eq!(avg_total.weight_sum_sq, avg_left.weight_sum_sq);
|
|
||||||
assert_almost_eq!(avg_total.weighted_avg, avg_left.weighted_avg, 1e-15);
|
|
||||||
assert_almost_eq!(avg_total.unweighted_avg.mean(),
|
|
||||||
avg_left.unweighted_avg.mean(), 1e-15);
|
|
||||||
assert_almost_eq!(avg_total.unweighted_avg.sample_variance(),
|
|
||||||
avg_left.unweighted_avg.sample_variance(), 1e-14);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -44,6 +44,21 @@ fn numerically_unstable() {
|
|||||||
assert_eq!(a.sample_variance(), 30.);
|
assert_eq!(a.sample_variance(), 30.);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn merge() {
|
||||||
|
let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||||
|
for mid in 0..sequence.len() {
|
||||||
|
let (left, right) = sequence.split_at(mid);
|
||||||
|
let avg_total: AverageWithError = sequence.iter().map(|x| *x).collect();
|
||||||
|
let mut avg_left: AverageWithError = left.iter().map(|x| *x).collect();
|
||||||
|
let avg_right: AverageWithError = right.iter().map(|x| *x).collect();
|
||||||
|
avg_left.merge(&avg_right);
|
||||||
|
assert_eq!(avg_total.len(), avg_left.len());
|
||||||
|
assert_eq!(avg_total.mean(), avg_left.mean());
|
||||||
|
assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn normal_distribution() {
|
fn normal_distribution() {
|
||||||
use rand::distributions::{Normal, IndependentSample};
|
use rand::distributions::{Normal, IndependentSample};
|
||||||
|
@ -64,3 +64,43 @@ fn error_corner_case() {
|
|||||||
.map(|(x, w)| (*x, *w)).collect();
|
.map(|(x, w)| (*x, *w)).collect();
|
||||||
assert_eq!(a.error(), 0.5);
|
assert_eq!(a.error(), 0.5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn merge_unweighted() {
|
||||||
|
let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.];
|
||||||
|
for mid in 0..sequence.len() {
|
||||||
|
let (left, right) = sequence.split_at(mid);
|
||||||
|
let avg_total: WeightedAverageWithError = sequence.iter().map(|x| (*x, 1.)).collect();
|
||||||
|
let mut avg_left: WeightedAverageWithError = left.iter().map(|x| (*x, 1.)).collect();
|
||||||
|
let avg_right: WeightedAverageWithError = right.iter().map(|x| (*x, 1.)).collect();
|
||||||
|
avg_left.merge(&avg_right);
|
||||||
|
|
||||||
|
assert_eq!(avg_total.sum_weights(), avg_left.sum_weights());
|
||||||
|
assert_eq!(avg_total.sum_weights_sq(), avg_left.sum_weights_sq());
|
||||||
|
|
||||||
|
assert_eq!(avg_total.len(), avg_left.len());
|
||||||
|
assert_eq!(avg_total.unweighted_mean(), avg_left.unweighted_mean());
|
||||||
|
assert_eq!(avg_total.weighted_mean(), avg_left.weighted_mean());
|
||||||
|
assert_eq!(avg_total.sample_variance(), avg_left.sample_variance());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn merge_weighted() {
|
||||||
|
let sequence: &[(f64, f64)] = &[
|
||||||
|
(1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
|
||||||
|
(6., 0.6), (7., 0.7), (8., 0.8), (9., 0.)];
|
||||||
|
for mid in 0..sequence.len() {
|
||||||
|
let (left, right) = sequence.split_at(mid);
|
||||||
|
let avg_total: WeightedAverageWithError = sequence.iter().map(|&(x, w)| (x, w)).collect();
|
||||||
|
let mut avg_left: WeightedAverageWithError = left.iter().map(|&(x, w)| (x, w)).collect();
|
||||||
|
let avg_right: WeightedAverageWithError = right.iter().map(|&(x, w)| (x, w)).collect();
|
||||||
|
avg_left.merge(&avg_right);
|
||||||
|
assert_eq!(avg_total.len(), avg_left.len());
|
||||||
|
assert_almost_eq!(avg_total.sum_weights(), avg_left.sum_weights(), 1e-15);
|
||||||
|
assert_eq!(avg_total.sum_weights_sq(), avg_left.sum_weights_sq());
|
||||||
|
assert_almost_eq!(avg_total.weighted_mean(), avg_left.weighted_mean(), 1e-15);
|
||||||
|
assert_almost_eq!(avg_total.unweighted_mean(), avg_left.unweighted_mean(), 1e-15);
|
||||||
|
assert_almost_eq!(avg_total.sample_variance(), avg_left.sample_variance(), 1e-14);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user