Calculate standard error of weighted mean

Also improve the documentation a bit.
This commit is contained in:
Vinzent Steinberg 2017-05-05 20:28:49 +02:00
parent 0488d6f105
commit e54d4aba8b
2 changed files with 73 additions and 7 deletions

View File

@ -48,7 +48,7 @@ impl Average {
self.n == 0
}
/// Return the mean of the sequence.
/// Estimate the mean of the sequence.
pub fn mean(&self) -> f64 {
self.avg
}
@ -78,7 +78,7 @@ impl Average {
self.v / f64::approx_from(self.n).unwrap()
}
/// Calculate the standard error of the mean of the sequence.
/// Estimate the standard error of the mean of the sequence.
pub fn error(&self) -> f64 {
if self.n == 0 {
return 0.;
@ -153,6 +153,12 @@ mod tests {
assert_eq!(a.sample_variance(), 0.0);
assert_eq!(a.population_variance(), 0.0);
assert_eq!(a.error(), 0.0);
a.add(1.0);
assert_eq!(a.mean(), 1.0);
assert_eq!(a.len(), 2);
assert_eq!(a.sample_variance(), 0.0);
assert_eq!(a.population_variance(), 0.0);
assert_eq!(a.error(), 0.0);
}
#[test]

View File

@ -25,7 +25,9 @@ impl WeightedAverage {
// This algorithm was suggested by West in 1979.
//
// See
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
// and
// http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
self.weight_sum += weight;
self.weight_sum_sq += weight*weight;
let prev_avg = self.avg;
@ -48,11 +50,19 @@ impl WeightedAverage {
self.weight_sum_sq
}
/// Return the weighted mean of the sequence.
/// Estimate the weighted mean of the sequence.
pub fn mean(&self) -> f64 {
self.avg
}
/// Calculate the effective sample size.
pub fn effective_len(&self) -> f64 {
if self.is_empty() {
return 0.
}
self.weight_sum * self.weight_sum / self.weight_sum_sq
}
/// Calculate the population variance of the weighted sequence.
///
/// This assumes that the sequence consists of the entire population and the
@ -69,8 +79,10 @@ impl WeightedAverage {
///
/// This assumes that the sequence consists of samples of a larger
/// population and the weights represent *frequency*.
///
/// Note that this is undefined if the sum of the weights is 1.
pub fn sample_variance(&self) -> f64 {
if self.is_empty() {
if self.effective_len() <= 1. {
0.
} else {
self.v / (self.weight_sum - 1.0)
@ -87,6 +99,25 @@ impl WeightedAverage {
self.v / (self.weight_sum - self.weight_sum_sq / self.weight_sum)
}
}
/// Estimate the standard error of the weighted mean of the sequence.
pub fn error(&self) -> f64 {
// This uses the same estimate as SPSS.
//
// See http://www.analyticalgroup.com/download/WEIGHTED_MEAN.pdf.
if self.is_empty() {
return 0.;
}
let variance = if self.weight_sum != 1. {
// We generally want to use the weighted sample variance...
self.sample_variance()
} else {
// ...but in this case it is undefined, so we use the weighted
// population variance instead.
self.population_variance()
};
(variance / self.weight_sum).sqrt()
}
}
impl core::default::Default for WeightedAverage {
@ -123,7 +154,13 @@ mod tests {
assert_eq!(a.sum_weights(), 1.0);
assert_eq!(a.sum_weights_sq(), 1.0);
assert_eq!(a.population_variance(), 0.0);
//assert_eq!(a.error(), 0.0);
assert_eq!(a.error(), 0.0);
a.add(1.0, 1.0);
assert_eq!(a.mean(), 1.0);
assert_eq!(a.sum_weights(), 2.0);
assert_eq!(a.sum_weights_sq(), 2.0);
assert_eq!(a.population_variance(), 0.0);
assert_eq!(a.error(), 0.0);
}
#[test]
@ -132,6 +169,29 @@ mod tests {
assert_eq!(a.mean(), 3.0);
assert_eq!(a.sum_weights(), 5.0);
assert_eq!(a.sample_variance(), 2.5);
//assert_almost_eq!(a.error(), f64::sqrt(0.5), 1e-16);
assert_almost_eq!(a.error(), f64::sqrt(0.5), 1e-16);
}
#[test]
fn reference() {
// Example from http://www.analyticalgroup.com/download/WEIGHTED_MEAN.pdf.
let values = &[5., 5., 4., 4., 3., 4., 3., 2., 2., 1.];
let weights = &[1.23, 2.12, 1.23, 0.32, 1.53, 0.59, 0.94, 0.94, 0.84, 0.73];
let a: WeightedAverage = values.iter().zip(weights.iter())
.map(|(x, w)| (*x, *w)).collect();
assert_almost_eq!(a.mean(), 3.53486, 1e-5);
assert_almost_eq!(a.sample_variance(), 1.8210, 1e-4);
assert_eq!(a.sum_weights(), 10.47);
assert_almost_eq!(a.effective_len(), 8.2315, 1e-4);
assert_almost_eq!(a.error(), f64::sqrt(0.1739), 1e-4);
}
#[test]
fn error_corner_case() {
let values = &[1., 2.];
let weights = &[0.5, 0.5];
let a: WeightedAverage = values.iter().zip(weights.iter())
.map(|(x, w)| (*x, *w)).collect();
assert_eq!(a.error(), 0.5);
}
}