Implement histograms

In order for them to have constant size, a macro is provided to crate
the histogram type. This should be replaced by const generics once Rust
has them.
This commit is contained in:
Vinzent Steinberg 2018-03-06 15:54:30 +01:00
parent 68a4fa64cb
commit 3a0dcafd21
3 changed files with 188 additions and 0 deletions

114
src/histogram.rs Normal file
View File

@ -0,0 +1,114 @@
/// Define a histogram with a number of bins known at compile time.
///
/// ```
/// # extern crate core;
/// # #[macro_use] extern crate average;
/// # fn main() {
/// define_histogram!(Histogram, 10);
/// let mut h = Histogram::with_const_width(0., 100.);
/// for i in 0..100 {
/// h.add(i as f64).unwrap();
/// }
/// assert_eq!(h.bins(), &[10, 10, 10, 10, 10, 10, 10, 10, 10, 10]);
/// # }
/// ```
#[macro_export]
macro_rules! define_histogram {
($name:ident, $LEN:expr) => (
/// The number of bins of the histogram.
const LEN: usize = $LEN;
/// A histogram with a number of bins known at compile time.
#[derive(Debug, Clone)]
pub struct $name {
range: [f64; LEN + 1],
bin: [u64; LEN],
}
impl $name {
/// Construct a histogram with constant bin width.
#[inline]
pub fn with_const_width(start: f64, end: f64) -> Self {
let step = (end - start) / (LEN as f64);
let mut range = [0.; LEN + 1];
for i in 0..(LEN + 1) {
range[i] = step * (i as f64);
}
Self {
range: range,
bin: [0; LEN],
}
}
/// Construct a histogram from given ranges.
///
/// The ranges are given by an iterator of floats where neighboring
/// pairs `(a, b)` define a bin for all `x` where `a <= x < b`.
///
/// Fails if the iterator is too short (less than `n + 1` where `n`
/// is the number of bins), is not sorted or contains `nan`. `inf`
/// and empty ranges are allowed.
#[inline]
pub fn from_ranges<T>(ranges: T) -> Result<Self, ()>
where T: IntoIterator<Item = f64>
{
let mut range = [0.; LEN + 1];
let mut last_i = 0;
for (i, r) in ranges.into_iter().enumerate() {
if i > LEN {
break;
}
if r.is_nan() {
return Err(());
}
if i > 0 && range[i - 1] > r {
return Err(());
}
range[i] = r;
last_i = i;
}
if last_i != LEN {
return Err(());
}
Ok(Self {
range: range,
bin: [0; LEN],
})
}
/// Add a sample to the histogram.
///
/// Fails if the sample is out of range of the histogram.
#[inline]
pub fn add(&mut self, x: f64) -> Result<(), ()> {
// We made sure our ranges are valid at construction, so we can
// safely unwrap.
match self.range.binary_search_by(|p| p.partial_cmp(&x).unwrap()) {
Ok(i) if i < LEN => {
self.bin[i] += 1;
},
Err(i) if i > 0 && i < LEN + 1 => {
self.bin[i - 1] += 1;
},
_ => {
return Err(());
},
}
Ok(())
}
/// Return the bins of the histogram.
#[inline]
pub fn bins(&self) -> &[u64] {
&self.bin as &[u64]
}
/// Return the ranges of the histogram.
#[inline]
pub fn ranges(&self) -> &[f64] {
&self.range as &[f64]
}
}
);
}

View File

@ -89,6 +89,7 @@ mod weighted_mean;
mod minmax;
mod quantile;
mod traits;
#[macro_use] mod histogram;
pub use moments::{Mean, Variance, Skewness, Kurtosis, MeanWithError, Moments};
pub use weighted_mean::{WeightedMean, WeightedMeanWithError};

73
tests/histogram.rs Normal file
View File

@ -0,0 +1,73 @@
#[macro_use] extern crate average;
extern crate core;
use core::iter::Iterator;
define_histogram!(Histogram, 10);
#[test]
fn with_const_width() {
let mut h = Histogram::with_const_width(0., 100.);
for i in 0..100 {
h.add(i as f64).unwrap();
}
assert_eq!(h.bins(), &[10, 10, 10, 10, 10, 10, 10, 10, 10, 10]);
}
#[test]
fn from_ranges() {
let mut h = Histogram::from_ranges(
[0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9, 1.0, 2.0].iter().cloned()).unwrap();
for &i in &[0.05, 0.7, 1.0, 1.5] {
h.add(i).unwrap();
}
assert_eq!(h.bins(), &[1, 0, 0, 0, 0, 0, 1, 0, 0, 2]);
}
#[test]
fn from_ranges_infinity() {
let inf = std::f64::INFINITY;
let mut h = Histogram::from_ranges(
[-inf, -0.4, -0.3, -0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4, inf].iter().cloned()).unwrap();
for &i in &[-100., -0.45, 0., 0.25, 0.4, 100.] {
h.add(i).unwrap();
}
assert_eq!(h.bins(), &[2, 0, 0, 0, 0, 1, 0, 1, 0, 2]);
}
#[test]
fn from_ranges_invalid() {
assert!(Histogram::from_ranges([].iter().cloned()).is_err());
let valid = vec![0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9, 1.0, 2.0];
assert!(Histogram::from_ranges(valid.iter().cloned()).is_ok());
let mut invalid_nan = valid.clone();
invalid_nan[3] = std::f64::NAN;
assert!(Histogram::from_ranges(invalid_nan.iter().cloned()).is_err());
let mut invalid_order = valid.clone();
invalid_order[10] = 0.9;
assert!(Histogram::from_ranges(invalid_order.iter().cloned()).is_err());
let mut valid_empty_ranges = valid.clone();
valid_empty_ranges[1] = 0.;
valid_empty_ranges[10] = 1.;
}
#[test]
fn from_ranges_empty() {
let mut h = Histogram::from_ranges(
[0., 0., 0.2, 0.3, 0.4, 0.5, 0.5, 0.8, 0.9, 2.0, 2.0].iter().cloned()).unwrap();
for &i in &[0.05, 0.7, 1.0, 1.5] {
h.add(i).unwrap();
}
assert_eq!(h.bins(), &[0, 1, 0, 0, 0, 0, 1, 0, 2, 0]);
}
#[test]
fn out_of_range() {
let mut h = Histogram::with_const_width(0., 100.);
assert_eq!(h.add(-0.1), Err(()));
assert_eq!(h.add(0.0), Ok(()));
assert_eq!(h.add(1.0), Ok(()));
assert_eq!(h.add(100.0), Err(()));
assert_eq!(h.add(100.1), Err(()));
}