Make sure quantile works for small samples

Before it was returning wrong results for samples with less than 5
elements.

Also mention that average and quantile will be 0 for empty samples.
This commit is contained in:
Vinzent Steinberg 2017-05-28 14:03:53 +02:00
parent 46c7220c9a
commit 9bf56690e6
3 changed files with 83 additions and 7 deletions

View File

@ -48,6 +48,8 @@ impl Average {
} }
/// Estimate the mean of the population. /// Estimate the mean of the population.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn mean(&self) -> f64 { pub fn mean(&self) -> f64 {
self.avg self.avg
@ -158,6 +160,8 @@ impl AverageWithError {
} }
/// Estimate the mean of the population. /// Estimate the mean of the population.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn mean(&self) -> f64 { pub fn mean(&self) -> f64 {
self.avg.mean() self.avg.mean()

View File

@ -1,3 +1,5 @@
use core::cmp::min;
use conv::{ApproxFrom, ConvAsUtil, ValueFrom}; use conv::{ApproxFrom, ConvAsUtil, ValueFrom};
use quickersort::sort_floats; use quickersort::sort_floats;
@ -29,6 +31,12 @@ impl Quantile {
} }
} }
/// Return the value of `p` for this p-quantile.
#[inline]
pub fn p(&self) -> f64 {
self.dm[2]
}
/// Add an observation sampled from the population. /// Add an observation sampled from the population.
#[inline] #[inline]
pub fn add(&mut self, x: f64) { pub fn add(&mut self, x: f64) {
@ -109,26 +117,59 @@ impl Quantile {
} }
/// Estimate the p-quantile of the population. /// Estimate the p-quantile of the population.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn quantile(&self) -> f64 { pub fn quantile(&self) -> f64 {
self.q[2] if self.len() >= 5 {
return self.q[2];
}
// Estimate quantile by sorting the sample.
if self.is_empty() {
return 0.;
}
let mut heights: [f64; 4] = [
self.q[0], self.q[1], self.q[2], self.q[3]
];
let len = usize::value_from(self.len()).unwrap(); // < 5
sort_floats(&mut heights[..len]);
let desired_index = f64::approx_from(len).unwrap() * self.p() - 1.;
let mut index = desired_index.ceil();
if desired_index == index && index >= 0. {
let index: usize = index.approx().unwrap(); // < 5
if index < len - 1 {
// `q[index]` and `q[index + 1]` are equally valid estimates,
// by convention we take their average.
return 0.5*self.q[index] + 0.5*self.q[index + 1];
}
}
index = index.max(0.);
let mut index: usize = index.approx().unwrap(); // < 5
index = min(index, len - 1);
self.q[index]
} }
/// Return the sample size. /// Return the sample size.
#[inline] #[inline]
pub fn len(&self) -> u64 { pub fn len(&self) -> u64 {
u64::value_from(self.n[4]).unwrap() u64::value_from(self.n[4]).unwrap() // n[4] >= 0
//^ Shouldn't fail on any known platform. }
/// Determine whether the sample is empty.
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
} }
} }
#[test] #[test]
fn reference() { fn reference() {
let observations = [ let observations = [
0.02, 0.5, 0.74, 3.39, 0.83, 0.02, 0.5, 0.74, 3.39, 0.83,
22.37, 10.15, 15.43, 38.62, 15.92, 22.37, 10.15, 15.43, 38.62, 15.92,
34.60, 10.28, 1.47, 0.40, 0.05, 34.60, 10.28, 1.47, 0.40, 0.05,
11.39, 0.27, 0.42, 0.09, 11.37, 11.39, 0.27, 0.42, 0.09, 11.37,
]; ];
let mut q = Quantile::new(0.5); let mut q = Quantile::new(0.5);
for &o in observations.iter() { for &o in observations.iter() {
@ -139,3 +180,22 @@ fn reference() {
assert_eq!(q.len(), 20); assert_eq!(q.len(), 20);
assert_eq!(q.quantile(), 4.2462394088036435); assert_eq!(q.quantile(), 4.2462394088036435);
} }
#[test]
fn few_observations() {
let mut q = Quantile::new(0.5);
assert_eq!(q.len(), 0);
assert_eq!(q.quantile(), 0.);
q.add(1.);
assert_eq!(q.len(), 1);
assert_eq!(q.quantile(), 1.);
q.add(2.);
assert_eq!(q.len(), 2);
assert_eq!(q.quantile(), 1.5);
q.add(3.);
assert_eq!(q.len(), 3);
assert_eq!(q.quantile(), 2.);
q.add(4.);
assert_eq!(q.len(), 4);
assert_eq!(q.quantile(), 2.5);
}

View File

@ -56,12 +56,16 @@ impl WeightedAverage {
} }
/// Return the sum of the weights. /// Return the sum of the weights.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn sum_weights(&self) -> f64 { pub fn sum_weights(&self) -> f64 {
self.weight_sum self.weight_sum
} }
/// Estimate the weighted mean of the population. /// Estimate the weighted mean of the population.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn mean(&self) -> f64 { pub fn mean(&self) -> f64 {
self.weighted_avg self.weighted_avg
@ -171,24 +175,32 @@ impl WeightedAverageWithError {
} }
/// Return the sum of the weights. /// Return the sum of the weights.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn sum_weights(&self) -> f64 { pub fn sum_weights(&self) -> f64 {
self.weighted_avg.sum_weights() self.weighted_avg.sum_weights()
} }
/// Return the sum of the squared weights. /// Return the sum of the squared weights.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn sum_weights_sq(&self) -> f64 { pub fn sum_weights_sq(&self) -> f64 {
self.weight_sum_sq self.weight_sum_sq
} }
/// Estimate the weighted mean of the population. /// Estimate the weighted mean of the population.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn weighted_mean(&self) -> f64 { pub fn weighted_mean(&self) -> f64 {
self.weighted_avg.mean() self.weighted_avg.mean()
} }
/// Estimate the unweighted mean of the population. /// Estimate the unweighted mean of the population.
///
/// Returns 0 for an empty sample.
#[inline] #[inline]
pub fn unweighted_mean(&self) -> f64 { pub fn unweighted_mean(&self) -> f64 {
self.unweighted_avg.mean() self.unweighted_avg.mean()