Make sure quantile works for small samples
Before it was returning wrong results for samples with less than 5 elements. Also mention that average and quantile will be 0 for empty samples.
This commit is contained in:
parent
46c7220c9a
commit
9bf56690e6
@ -48,6 +48,8 @@ impl Average {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the mean of the population.
|
/// Estimate the mean of the population.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn mean(&self) -> f64 {
|
pub fn mean(&self) -> f64 {
|
||||||
self.avg
|
self.avg
|
||||||
@ -158,6 +160,8 @@ impl AverageWithError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the mean of the population.
|
/// Estimate the mean of the population.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn mean(&self) -> f64 {
|
pub fn mean(&self) -> f64 {
|
||||||
self.avg.mean()
|
self.avg.mean()
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use core::cmp::min;
|
||||||
|
|
||||||
use conv::{ApproxFrom, ConvAsUtil, ValueFrom};
|
use conv::{ApproxFrom, ConvAsUtil, ValueFrom};
|
||||||
use quickersort::sort_floats;
|
use quickersort::sort_floats;
|
||||||
|
|
||||||
@ -29,6 +31,12 @@ impl Quantile {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the value of `p` for this p-quantile.
|
||||||
|
#[inline]
|
||||||
|
pub fn p(&self) -> f64 {
|
||||||
|
self.dm[2]
|
||||||
|
}
|
||||||
|
|
||||||
/// Add an observation sampled from the population.
|
/// Add an observation sampled from the population.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn add(&mut self, x: f64) {
|
pub fn add(&mut self, x: f64) {
|
||||||
@ -109,26 +117,59 @@ impl Quantile {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the p-quantile of the population.
|
/// Estimate the p-quantile of the population.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn quantile(&self) -> f64 {
|
pub fn quantile(&self) -> f64 {
|
||||||
self.q[2]
|
if self.len() >= 5 {
|
||||||
|
return self.q[2];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimate quantile by sorting the sample.
|
||||||
|
if self.is_empty() {
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
let mut heights: [f64; 4] = [
|
||||||
|
self.q[0], self.q[1], self.q[2], self.q[3]
|
||||||
|
];
|
||||||
|
let len = usize::value_from(self.len()).unwrap(); // < 5
|
||||||
|
sort_floats(&mut heights[..len]);
|
||||||
|
let desired_index = f64::approx_from(len).unwrap() * self.p() - 1.;
|
||||||
|
let mut index = desired_index.ceil();
|
||||||
|
if desired_index == index && index >= 0. {
|
||||||
|
let index: usize = index.approx().unwrap(); // < 5
|
||||||
|
if index < len - 1 {
|
||||||
|
// `q[index]` and `q[index + 1]` are equally valid estimates,
|
||||||
|
// by convention we take their average.
|
||||||
|
return 0.5*self.q[index] + 0.5*self.q[index + 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
index = index.max(0.);
|
||||||
|
let mut index: usize = index.approx().unwrap(); // < 5
|
||||||
|
index = min(index, len - 1);
|
||||||
|
self.q[index]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the sample size.
|
/// Return the sample size.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn len(&self) -> u64 {
|
pub fn len(&self) -> u64 {
|
||||||
u64::value_from(self.n[4]).unwrap()
|
u64::value_from(self.n[4]).unwrap() // n[4] >= 0
|
||||||
//^ Shouldn't fail on any known platform.
|
}
|
||||||
|
|
||||||
|
/// Determine whether the sample is empty.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn reference() {
|
fn reference() {
|
||||||
let observations = [
|
let observations = [
|
||||||
0.02, 0.5, 0.74, 3.39, 0.83,
|
0.02, 0.5, 0.74, 3.39, 0.83,
|
||||||
22.37, 10.15, 15.43, 38.62, 15.92,
|
22.37, 10.15, 15.43, 38.62, 15.92,
|
||||||
34.60, 10.28, 1.47, 0.40, 0.05,
|
34.60, 10.28, 1.47, 0.40, 0.05,
|
||||||
11.39, 0.27, 0.42, 0.09, 11.37,
|
11.39, 0.27, 0.42, 0.09, 11.37,
|
||||||
];
|
];
|
||||||
let mut q = Quantile::new(0.5);
|
let mut q = Quantile::new(0.5);
|
||||||
for &o in observations.iter() {
|
for &o in observations.iter() {
|
||||||
@ -139,3 +180,22 @@ fn reference() {
|
|||||||
assert_eq!(q.len(), 20);
|
assert_eq!(q.len(), 20);
|
||||||
assert_eq!(q.quantile(), 4.2462394088036435);
|
assert_eq!(q.quantile(), 4.2462394088036435);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn few_observations() {
|
||||||
|
let mut q = Quantile::new(0.5);
|
||||||
|
assert_eq!(q.len(), 0);
|
||||||
|
assert_eq!(q.quantile(), 0.);
|
||||||
|
q.add(1.);
|
||||||
|
assert_eq!(q.len(), 1);
|
||||||
|
assert_eq!(q.quantile(), 1.);
|
||||||
|
q.add(2.);
|
||||||
|
assert_eq!(q.len(), 2);
|
||||||
|
assert_eq!(q.quantile(), 1.5);
|
||||||
|
q.add(3.);
|
||||||
|
assert_eq!(q.len(), 3);
|
||||||
|
assert_eq!(q.quantile(), 2.);
|
||||||
|
q.add(4.);
|
||||||
|
assert_eq!(q.len(), 4);
|
||||||
|
assert_eq!(q.quantile(), 2.5);
|
||||||
|
}
|
||||||
|
@ -56,12 +56,16 @@ impl WeightedAverage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Return the sum of the weights.
|
/// Return the sum of the weights.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn sum_weights(&self) -> f64 {
|
pub fn sum_weights(&self) -> f64 {
|
||||||
self.weight_sum
|
self.weight_sum
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the weighted mean of the population.
|
/// Estimate the weighted mean of the population.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn mean(&self) -> f64 {
|
pub fn mean(&self) -> f64 {
|
||||||
self.weighted_avg
|
self.weighted_avg
|
||||||
@ -171,24 +175,32 @@ impl WeightedAverageWithError {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Return the sum of the weights.
|
/// Return the sum of the weights.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn sum_weights(&self) -> f64 {
|
pub fn sum_weights(&self) -> f64 {
|
||||||
self.weighted_avg.sum_weights()
|
self.weighted_avg.sum_weights()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the sum of the squared weights.
|
/// Return the sum of the squared weights.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn sum_weights_sq(&self) -> f64 {
|
pub fn sum_weights_sq(&self) -> f64 {
|
||||||
self.weight_sum_sq
|
self.weight_sum_sq
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the weighted mean of the population.
|
/// Estimate the weighted mean of the population.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn weighted_mean(&self) -> f64 {
|
pub fn weighted_mean(&self) -> f64 {
|
||||||
self.weighted_avg.mean()
|
self.weighted_avg.mean()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Estimate the unweighted mean of the population.
|
/// Estimate the unweighted mean of the population.
|
||||||
|
///
|
||||||
|
/// Returns 0 for an empty sample.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn unweighted_mean(&self) -> f64 {
|
pub fn unweighted_mean(&self) -> f64 {
|
||||||
self.unweighted_avg.mean()
|
self.unweighted_avg.mean()
|
||||||
|
Loading…
Reference in New Issue
Block a user