Calculate standard error of weighted mean
Also improve the documentation a bit.
This commit is contained in:
parent
0488d6f105
commit
e54d4aba8b
@ -48,7 +48,7 @@ impl Average {
|
|||||||
self.n == 0
|
self.n == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the mean of the sequence.
|
/// Estimate the mean of the sequence.
|
||||||
pub fn mean(&self) -> f64 {
|
pub fn mean(&self) -> f64 {
|
||||||
self.avg
|
self.avg
|
||||||
}
|
}
|
||||||
@ -78,7 +78,7 @@ impl Average {
|
|||||||
self.v / f64::approx_from(self.n).unwrap()
|
self.v / f64::approx_from(self.n).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the standard error of the mean of the sequence.
|
/// Estimate the standard error of the mean of the sequence.
|
||||||
pub fn error(&self) -> f64 {
|
pub fn error(&self) -> f64 {
|
||||||
if self.n == 0 {
|
if self.n == 0 {
|
||||||
return 0.;
|
return 0.;
|
||||||
@ -153,6 +153,12 @@ mod tests {
|
|||||||
assert_eq!(a.sample_variance(), 0.0);
|
assert_eq!(a.sample_variance(), 0.0);
|
||||||
assert_eq!(a.population_variance(), 0.0);
|
assert_eq!(a.population_variance(), 0.0);
|
||||||
assert_eq!(a.error(), 0.0);
|
assert_eq!(a.error(), 0.0);
|
||||||
|
a.add(1.0);
|
||||||
|
assert_eq!(a.mean(), 1.0);
|
||||||
|
assert_eq!(a.len(), 2);
|
||||||
|
assert_eq!(a.sample_variance(), 0.0);
|
||||||
|
assert_eq!(a.population_variance(), 0.0);
|
||||||
|
assert_eq!(a.error(), 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -25,7 +25,9 @@ impl WeightedAverage {
|
|||||||
// This algorithm was suggested by West in 1979.
|
// This algorithm was suggested by West in 1979.
|
||||||
//
|
//
|
||||||
// See
|
// See
|
||||||
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
|
||||||
|
// and
|
||||||
|
// http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
|
||||||
self.weight_sum += weight;
|
self.weight_sum += weight;
|
||||||
self.weight_sum_sq += weight*weight;
|
self.weight_sum_sq += weight*weight;
|
||||||
let prev_avg = self.avg;
|
let prev_avg = self.avg;
|
||||||
@ -48,11 +50,19 @@ impl WeightedAverage {
|
|||||||
self.weight_sum_sq
|
self.weight_sum_sq
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the weighted mean of the sequence.
|
/// Estimate the weighted mean of the sequence.
|
||||||
pub fn mean(&self) -> f64 {
|
pub fn mean(&self) -> f64 {
|
||||||
self.avg
|
self.avg
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Calculate the effective sample size.
|
||||||
|
pub fn effective_len(&self) -> f64 {
|
||||||
|
if self.is_empty() {
|
||||||
|
return 0.
|
||||||
|
}
|
||||||
|
self.weight_sum * self.weight_sum / self.weight_sum_sq
|
||||||
|
}
|
||||||
|
|
||||||
/// Calculate the population variance of the weighted sequence.
|
/// Calculate the population variance of the weighted sequence.
|
||||||
///
|
///
|
||||||
/// This assumes that the sequence consists of the entire population and the
|
/// This assumes that the sequence consists of the entire population and the
|
||||||
@ -69,8 +79,10 @@ impl WeightedAverage {
|
|||||||
///
|
///
|
||||||
/// This assumes that the sequence consists of samples of a larger
|
/// This assumes that the sequence consists of samples of a larger
|
||||||
/// population and the weights represent *frequency*.
|
/// population and the weights represent *frequency*.
|
||||||
|
///
|
||||||
|
/// Note that this is undefined if the sum of the weights is 1.
|
||||||
pub fn sample_variance(&self) -> f64 {
|
pub fn sample_variance(&self) -> f64 {
|
||||||
if self.is_empty() {
|
if self.effective_len() <= 1. {
|
||||||
0.
|
0.
|
||||||
} else {
|
} else {
|
||||||
self.v / (self.weight_sum - 1.0)
|
self.v / (self.weight_sum - 1.0)
|
||||||
@ -87,6 +99,25 @@ impl WeightedAverage {
|
|||||||
self.v / (self.weight_sum - self.weight_sum_sq / self.weight_sum)
|
self.v / (self.weight_sum - self.weight_sum_sq / self.weight_sum)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Estimate the standard error of the weighted mean of the sequence.
|
||||||
|
pub fn error(&self) -> f64 {
|
||||||
|
// This uses the same estimate as SPSS.
|
||||||
|
//
|
||||||
|
// See http://www.analyticalgroup.com/download/WEIGHTED_MEAN.pdf.
|
||||||
|
if self.is_empty() {
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
let variance = if self.weight_sum != 1. {
|
||||||
|
// We generally want to use the weighted sample variance...
|
||||||
|
self.sample_variance()
|
||||||
|
} else {
|
||||||
|
// ...but in this case it is undefined, so we use the weighted
|
||||||
|
// population variance instead.
|
||||||
|
self.population_variance()
|
||||||
|
};
|
||||||
|
(variance / self.weight_sum).sqrt()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl core::default::Default for WeightedAverage {
|
impl core::default::Default for WeightedAverage {
|
||||||
@ -123,7 +154,13 @@ mod tests {
|
|||||||
assert_eq!(a.sum_weights(), 1.0);
|
assert_eq!(a.sum_weights(), 1.0);
|
||||||
assert_eq!(a.sum_weights_sq(), 1.0);
|
assert_eq!(a.sum_weights_sq(), 1.0);
|
||||||
assert_eq!(a.population_variance(), 0.0);
|
assert_eq!(a.population_variance(), 0.0);
|
||||||
//assert_eq!(a.error(), 0.0);
|
assert_eq!(a.error(), 0.0);
|
||||||
|
a.add(1.0, 1.0);
|
||||||
|
assert_eq!(a.mean(), 1.0);
|
||||||
|
assert_eq!(a.sum_weights(), 2.0);
|
||||||
|
assert_eq!(a.sum_weights_sq(), 2.0);
|
||||||
|
assert_eq!(a.population_variance(), 0.0);
|
||||||
|
assert_eq!(a.error(), 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -132,6 +169,29 @@ mod tests {
|
|||||||
assert_eq!(a.mean(), 3.0);
|
assert_eq!(a.mean(), 3.0);
|
||||||
assert_eq!(a.sum_weights(), 5.0);
|
assert_eq!(a.sum_weights(), 5.0);
|
||||||
assert_eq!(a.sample_variance(), 2.5);
|
assert_eq!(a.sample_variance(), 2.5);
|
||||||
//assert_almost_eq!(a.error(), f64::sqrt(0.5), 1e-16);
|
assert_almost_eq!(a.error(), f64::sqrt(0.5), 1e-16);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reference() {
|
||||||
|
// Example from http://www.analyticalgroup.com/download/WEIGHTED_MEAN.pdf.
|
||||||
|
let values = &[5., 5., 4., 4., 3., 4., 3., 2., 2., 1.];
|
||||||
|
let weights = &[1.23, 2.12, 1.23, 0.32, 1.53, 0.59, 0.94, 0.94, 0.84, 0.73];
|
||||||
|
let a: WeightedAverage = values.iter().zip(weights.iter())
|
||||||
|
.map(|(x, w)| (*x, *w)).collect();
|
||||||
|
assert_almost_eq!(a.mean(), 3.53486, 1e-5);
|
||||||
|
assert_almost_eq!(a.sample_variance(), 1.8210, 1e-4);
|
||||||
|
assert_eq!(a.sum_weights(), 10.47);
|
||||||
|
assert_almost_eq!(a.effective_len(), 8.2315, 1e-4);
|
||||||
|
assert_almost_eq!(a.error(), f64::sqrt(0.1739), 1e-4);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn error_corner_case() {
|
||||||
|
let values = &[1., 2.];
|
||||||
|
let weights = &[0.5, 0.5];
|
||||||
|
let a: WeightedAverage = values.iter().zip(weights.iter())
|
||||||
|
.map(|(x, w)| (*x, *w)).collect();
|
||||||
|
assert_eq!(a.error(), 0.5);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user