From 9bf56690e6e4eee43de01d12ec8c7452c5d51c49 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Sun, 28 May 2017 14:03:53 +0200 Subject: [PATCH] Make sure quantile works for small samples Before it was returning wrong results for samples with less than 5 elements. Also mention that average and quantile will be 0 for empty samples. --- src/average.rs | 4 +++ src/quantile.rs | 74 +++++++++++++++++++++++++++++++++++++---- src/weighted_average.rs | 12 +++++++ 3 files changed, 83 insertions(+), 7 deletions(-) diff --git a/src/average.rs b/src/average.rs index 63f918a..02996f4 100644 --- a/src/average.rs +++ b/src/average.rs @@ -48,6 +48,8 @@ impl Average { } /// Estimate the mean of the population. + /// + /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.avg @@ -158,6 +160,8 @@ impl AverageWithError { } /// Estimate the mean of the population. + /// + /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.avg.mean() diff --git a/src/quantile.rs b/src/quantile.rs index c1eeebd..cb99440 100644 --- a/src/quantile.rs +++ b/src/quantile.rs @@ -1,3 +1,5 @@ +use core::cmp::min; + use conv::{ApproxFrom, ConvAsUtil, ValueFrom}; use quickersort::sort_floats; @@ -29,6 +31,12 @@ impl Quantile { } } + /// Return the value of `p` for this p-quantile. + #[inline] + pub fn p(&self) -> f64 { + self.dm[2] + } + /// Add an observation sampled from the population. #[inline] pub fn add(&mut self, x: f64) { @@ -109,26 +117,59 @@ impl Quantile { } /// Estimate the p-quantile of the population. + /// + /// Returns 0 for an empty sample. #[inline] pub fn quantile(&self) -> f64 { - self.q[2] + if self.len() >= 5 { + return self.q[2]; + } + + // Estimate quantile by sorting the sample. + if self.is_empty() { + return 0.; + } + let mut heights: [f64; 4] = [ + self.q[0], self.q[1], self.q[2], self.q[3] + ]; + let len = usize::value_from(self.len()).unwrap(); // < 5 + sort_floats(&mut heights[..len]); + let desired_index = f64::approx_from(len).unwrap() * self.p() - 1.; + let mut index = desired_index.ceil(); + if desired_index == index && index >= 0. { + let index: usize = index.approx().unwrap(); // < 5 + if index < len - 1 { + // `q[index]` and `q[index + 1]` are equally valid estimates, + // by convention we take their average. + return 0.5*self.q[index] + 0.5*self.q[index + 1]; + } + } + index = index.max(0.); + let mut index: usize = index.approx().unwrap(); // < 5 + index = min(index, len - 1); + self.q[index] } /// Return the sample size. #[inline] pub fn len(&self) -> u64 { - u64::value_from(self.n[4]).unwrap() - //^ Shouldn't fail on any known platform. + u64::value_from(self.n[4]).unwrap() // n[4] >= 0 + } + + /// Determine whether the sample is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 } } #[test] fn reference() { let observations = [ - 0.02, 0.5, 0.74, 3.39, 0.83, - 22.37, 10.15, 15.43, 38.62, 15.92, - 34.60, 10.28, 1.47, 0.40, 0.05, - 11.39, 0.27, 0.42, 0.09, 11.37, + 0.02, 0.5, 0.74, 3.39, 0.83, + 22.37, 10.15, 15.43, 38.62, 15.92, + 34.60, 10.28, 1.47, 0.40, 0.05, + 11.39, 0.27, 0.42, 0.09, 11.37, ]; let mut q = Quantile::new(0.5); for &o in observations.iter() { @@ -139,3 +180,22 @@ fn reference() { assert_eq!(q.len(), 20); assert_eq!(q.quantile(), 4.2462394088036435); } + +#[test] +fn few_observations() { + let mut q = Quantile::new(0.5); + assert_eq!(q.len(), 0); + assert_eq!(q.quantile(), 0.); + q.add(1.); + assert_eq!(q.len(), 1); + assert_eq!(q.quantile(), 1.); + q.add(2.); + assert_eq!(q.len(), 2); + assert_eq!(q.quantile(), 1.5); + q.add(3.); + assert_eq!(q.len(), 3); + assert_eq!(q.quantile(), 2.); + q.add(4.); + assert_eq!(q.len(), 4); + assert_eq!(q.quantile(), 2.5); +} diff --git a/src/weighted_average.rs b/src/weighted_average.rs index 62fd945..0cb20f2 100644 --- a/src/weighted_average.rs +++ b/src/weighted_average.rs @@ -56,12 +56,16 @@ impl WeightedAverage { } /// Return the sum of the weights. + /// + /// Returns 0 for an empty sample. #[inline] pub fn sum_weights(&self) -> f64 { self.weight_sum } /// Estimate the weighted mean of the population. + /// + /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.weighted_avg @@ -171,24 +175,32 @@ impl WeightedAverageWithError { } /// Return the sum of the weights. + /// + /// Returns 0 for an empty sample. #[inline] pub fn sum_weights(&self) -> f64 { self.weighted_avg.sum_weights() } /// Return the sum of the squared weights. + /// + /// Returns 0 for an empty sample. #[inline] pub fn sum_weights_sq(&self) -> f64 { self.weight_sum_sq } /// Estimate the weighted mean of the population. + /// + /// Returns 0 for an empty sample. #[inline] pub fn weighted_mean(&self) -> f64 { self.weighted_avg.mean() } /// Estimate the unweighted mean of the population. + /// + /// Returns 0 for an empty sample. #[inline] pub fn unweighted_mean(&self) -> f64 { self.unweighted_avg.mean()