Reduce code duplication
This commit is contained in:
parent
77fa8b4ed2
commit
9bb501116d
@ -1,6 +1,6 @@
|
|||||||
use core;
|
use core;
|
||||||
|
|
||||||
use conv::ApproxFrom;
|
use super::Average;
|
||||||
|
|
||||||
/// Estimate the weighted and unweighted arithmetic mean and the unweighted
|
/// Estimate the weighted and unweighted arithmetic mean and the unweighted
|
||||||
/// variance of a sequence of numbers ("population").
|
/// variance of a sequence of numbers ("population").
|
||||||
@ -26,12 +26,8 @@ pub struct WeightedAverage {
|
|||||||
/// Weighted average value.
|
/// Weighted average value.
|
||||||
weighted_avg: f64,
|
weighted_avg: f64,
|
||||||
|
|
||||||
/// Number of samples.
|
/// Estimator of unweighted average and its variance.
|
||||||
n: u64,
|
unweighted_avg: Average,
|
||||||
/// Unweighted average value.
|
|
||||||
unweighted_avg: f64,
|
|
||||||
/// Intermediate sum of squares for calculating the *unweighted* variance.
|
|
||||||
v: f64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WeightedAverage {
|
impl WeightedAverage {
|
||||||
@ -39,7 +35,7 @@ impl WeightedAverage {
|
|||||||
pub fn new() -> WeightedAverage {
|
pub fn new() -> WeightedAverage {
|
||||||
WeightedAverage {
|
WeightedAverage {
|
||||||
weight_sum: 0., weight_sum_sq: 0., weighted_avg: 0.,
|
weight_sum: 0., weight_sum_sq: 0., weighted_avg: 0.,
|
||||||
n: 0, unweighted_avg: 0., v: 0.,
|
unweighted_avg: Average::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -58,15 +54,12 @@ impl WeightedAverage {
|
|||||||
let prev_avg = self.weighted_avg;
|
let prev_avg = self.weighted_avg;
|
||||||
self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg);
|
self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg);
|
||||||
|
|
||||||
self.n += 1;
|
self.unweighted_avg.add(sample);
|
||||||
let delta = sample - self.unweighted_avg;
|
|
||||||
self.unweighted_avg += delta / f64::approx_from(self.n).unwrap();
|
|
||||||
self.v += delta * (sample - self.unweighted_avg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine whether the sample is empty.
|
/// Determine whether the sample is empty.
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.n == 0
|
self.unweighted_avg.is_empty()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the sum of the weights.
|
/// Return the sum of the weights.
|
||||||
@ -86,12 +79,12 @@ impl WeightedAverage {
|
|||||||
|
|
||||||
/// Estimate the unweighted mean of the sequence.
|
/// Estimate the unweighted mean of the sequence.
|
||||||
pub fn unweighted_mean(&self) -> f64 {
|
pub fn unweighted_mean(&self) -> f64 {
|
||||||
self.unweighted_avg
|
self.unweighted_avg.mean()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return sample size.
|
/// Return sample size.
|
||||||
pub fn len(&self) -> u64 {
|
pub fn len(&self) -> u64 {
|
||||||
self.n
|
self.unweighted_avg.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the effective sample size.
|
/// Calculate the effective sample size.
|
||||||
@ -106,20 +99,14 @@ impl WeightedAverage {
|
|||||||
///
|
///
|
||||||
/// This is a biased estimator of the variance of the population.
|
/// This is a biased estimator of the variance of the population.
|
||||||
pub fn population_variance(&self) -> f64 {
|
pub fn population_variance(&self) -> f64 {
|
||||||
if self.n < 2 {
|
self.unweighted_avg.population_variance()
|
||||||
return 0.;
|
|
||||||
}
|
|
||||||
self.v / f64::approx_from(self.n).unwrap()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Calculate the *unweighted* sample variance.
|
/// Calculate the *unweighted* sample variance.
|
||||||
///
|
///
|
||||||
/// This is an unbiased estimator of the variance of the population.
|
/// This is an unbiased estimator of the variance of the population.
|
||||||
pub fn sample_variance(&self) -> f64 {
|
pub fn sample_variance(&self) -> f64 {
|
||||||
if self.n < 2 {
|
self.unweighted_avg.sample_variance()
|
||||||
return 0.;
|
|
||||||
}
|
|
||||||
self.v / f64::approx_from(self.n - 1).unwrap()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the standard error of the *weighted* mean of the sequence.
|
/// Estimate the standard error of the *weighted* mean of the sequence.
|
||||||
@ -160,28 +147,14 @@ impl WeightedAverage {
|
|||||||
/// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15);
|
/// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15);
|
||||||
/// ```
|
/// ```
|
||||||
pub fn merge(&mut self, other: &WeightedAverage) {
|
pub fn merge(&mut self, other: &WeightedAverage) {
|
||||||
// This is similar to the algorithm proposed by Chan et al. in 1979.
|
let total_weight_sum = self.weight_sum + other.weight_sum;
|
||||||
//
|
self.weighted_avg = (self.weight_sum * self.weighted_avg
|
||||||
// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
|
+ other.weight_sum * other.weighted_avg)
|
||||||
{
|
/ total_weight_sum;
|
||||||
let total_weight_sum = self.weight_sum + other.weight_sum;
|
self.weight_sum = total_weight_sum;
|
||||||
self.weighted_avg = (self.weight_sum * self.weighted_avg
|
self.weight_sum_sq += other.weight_sum_sq;
|
||||||
+ other.weight_sum * other.weighted_avg)
|
|
||||||
/ (self.weight_sum + other.weight_sum);
|
self.unweighted_avg.merge(&other.unweighted_avg);
|
||||||
self.weight_sum = total_weight_sum;
|
|
||||||
self.weight_sum_sq += other.weight_sum_sq;
|
|
||||||
}
|
|
||||||
{
|
|
||||||
let delta = other.unweighted_avg - self.unweighted_avg;
|
|
||||||
let len_self = f64::approx_from(self.n).unwrap();
|
|
||||||
let len_other = f64::approx_from(other.n).unwrap();
|
|
||||||
let len_total = len_self + len_other;
|
|
||||||
self.n += other.n;
|
|
||||||
self.unweighted_avg = (len_self * self.unweighted_avg
|
|
||||||
+ len_other * other.unweighted_avg)
|
|
||||||
/ len_total;
|
|
||||||
self.v += other.v + delta*delta * len_self * len_other / len_total;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,12 +189,15 @@ mod tests {
|
|||||||
let mut avg_left: WeightedAverage = left.iter().map(|x| (*x, 1.)).collect();
|
let mut avg_left: WeightedAverage = left.iter().map(|x| (*x, 1.)).collect();
|
||||||
let avg_right: WeightedAverage = right.iter().map(|x| (*x, 1.)).collect();
|
let avg_right: WeightedAverage = right.iter().map(|x| (*x, 1.)).collect();
|
||||||
avg_left.merge(&avg_right);
|
avg_left.merge(&avg_right);
|
||||||
assert_eq!(avg_total.n, avg_left.n);
|
|
||||||
assert_eq!(avg_total.weight_sum, avg_left.weight_sum);
|
assert_eq!(avg_total.weight_sum, avg_left.weight_sum);
|
||||||
assert_eq!(avg_total.weight_sum_sq, avg_left.weight_sum_sq);
|
assert_eq!(avg_total.weight_sum_sq, avg_left.weight_sum_sq);
|
||||||
assert_eq!(avg_total.weighted_avg, avg_left.weighted_avg);
|
assert_eq!(avg_total.weighted_avg, avg_left.weighted_avg);
|
||||||
assert_eq!(avg_total.unweighted_avg, avg_left.unweighted_avg);
|
|
||||||
assert_eq!(avg_total.v, avg_left.v);
|
assert_eq!(avg_total.unweighted_avg.len(), avg_left.unweighted_avg.len());
|
||||||
|
assert_eq!(avg_total.unweighted_avg.mean(), avg_left.unweighted_avg.mean());
|
||||||
|
assert_eq!(avg_total.unweighted_avg.sample_variance(),
|
||||||
|
avg_left.unweighted_avg.sample_variance());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,12 +212,14 @@ mod tests {
|
|||||||
let mut avg_left: WeightedAverage = left.iter().map(|&(x, w)| (x, w)).collect();
|
let mut avg_left: WeightedAverage = left.iter().map(|&(x, w)| (x, w)).collect();
|
||||||
let avg_right: WeightedAverage = right.iter().map(|&(x, w)| (x, w)).collect();
|
let avg_right: WeightedAverage = right.iter().map(|&(x, w)| (x, w)).collect();
|
||||||
avg_left.merge(&avg_right);
|
avg_left.merge(&avg_right);
|
||||||
assert_eq!(avg_total.n, avg_left.n);
|
assert_eq!(avg_total.unweighted_avg.len(), avg_left.unweighted_avg.len());
|
||||||
assert_almost_eq!(avg_total.weight_sum, avg_left.weight_sum, 1e-15);
|
assert_almost_eq!(avg_total.weight_sum, avg_left.weight_sum, 1e-15);
|
||||||
assert_eq!(avg_total.weight_sum_sq, avg_left.weight_sum_sq);
|
assert_eq!(avg_total.weight_sum_sq, avg_left.weight_sum_sq);
|
||||||
assert_almost_eq!(avg_total.weighted_avg, avg_left.weighted_avg, 1e-15);
|
assert_almost_eq!(avg_total.weighted_avg, avg_left.weighted_avg, 1e-15);
|
||||||
assert_almost_eq!(avg_total.unweighted_avg, avg_left.unweighted_avg, 1e-15);
|
assert_almost_eq!(avg_total.unweighted_avg.mean(),
|
||||||
assert_almost_eq!(avg_total.v, avg_left.v, 1e-14);
|
avg_left.unweighted_avg.mean(), 1e-15);
|
||||||
|
assert_almost_eq!(avg_total.unweighted_avg.sample_variance(),
|
||||||
|
avg_left.unweighted_avg.sample_variance(), 1e-14);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user