diff --git a/src/histogram.rs b/src/histogram.rs index 17cfbb0..3d943d2 100644 --- a/src/histogram.rs +++ b/src/histogram.rs @@ -1,3 +1,264 @@ +#[doc(hidden)] +#[macro_export] +macro_rules! define_histogram_common { + ($LEN:expr) => ( + use $crate::Histogram as Trait; + + /// The number of bins of the histogram. + const LEN: usize = $LEN; + + impl ::core::fmt::Debug for Histogram { + fn fmt(&self, formatter: &mut ::core::fmt::Formatter<'_>) + -> ::core::fmt::Result { + formatter.write_str("Histogram {{ range: ")?; + self.range[..].fmt(formatter)?; + formatter.write_str(", bins: ")?; + self.bin[..].fmt(formatter)?; + formatter.write_str(" }}") + } + } + + impl Histogram { + /// Construct a histogram with constant bin width. + #[inline] + pub fn with_const_width(start: f64, end: f64) -> Self { + let step = (end - start) / (LEN as f64); + let mut range = [0.; LEN + 1]; + for (i, r) in range.iter_mut().enumerate() { + *r = start + step * (i as f64); + } + + Self { + range, + bin: [0; LEN], + } + } + + /// Construct a histogram from given ranges. + /// + /// The ranges are given by an iterator of floats where neighboring + /// pairs `(a, b)` define a bin for all `x` where `a <= x < b`. + /// + /// Fails if the iterator is too short (less than `n + 1` where `n` + /// is the number of bins), is not sorted or contains `nan`. `inf` + /// and empty ranges are allowed. + #[inline] + pub fn from_ranges(ranges: T) -> Result + where T: IntoIterator + { + let mut range = [0.; LEN + 1]; + let mut last_i = 0; + for (i, r) in ranges.into_iter().enumerate() { + if i > LEN { + break; + } + if r.is_nan() { + return Err(()); + } + if i > 0 && range[i - 1] > r { + return Err(()); + } + range[i] = r; + last_i = i; + } + if last_i != LEN { + return Err(()); + } + Ok(Self { + range, + bin: [0; LEN], + }) + } + + /// Find the index of the bin corresponding to the given sample. + /// + /// Fails if the sample is out of range of the histogram. + #[inline] + pub fn find(&self, x: f64) -> Result { + // We made sure our ranges are valid at construction, so we can + // safely unwrap. + match self.range.binary_search_by(|p| p.partial_cmp(&x).unwrap()) { + Ok(i) if i < LEN => { + Ok(i) + }, + Err(i) if i > 0 && i < LEN + 1 => { + Ok(i - 1) + }, + _ => { + Err(()) + }, + } + } + + /// Add a sample to the histogram. + /// + /// Fails if the sample is out of range of the histogram. + #[inline] + pub fn add(&mut self, x: f64) -> Result<(), ()> { + if let Ok(i) = self.find(x) { + self.bin[i] += 1; + Ok(()) + } else { + Err(()) + } + } + + /// Return the ranges of the histogram. + #[inline] + pub fn ranges(&self) -> &[f64] { + &self.range[..] + } + + /// Return an iterator over the bins and corresponding ranges: + /// `((lower, upper), count)` + #[inline] + pub fn iter(&self) -> IterHistogram<'_> { + self.into_iter() + } + + /// Reset all bins to zero. + #[inline] + pub fn reset(&mut self) { + self.bin = [0; LEN]; + } + + /// Return the lower range limit. + /// + /// (The corresponding bin might be empty.) + #[inline] + pub fn range_min(&self) -> f64 { + self.range[0] + } + + /// Return the upper range limit. + /// + /// (The corresponding bin might be empty.) + #[inline] + pub fn range_max(&self) -> f64 { + self.range[LEN] + } + } + + /// Iterate over all `(range, count)` pairs in the histogram. + pub struct IterHistogram<'a> { + remaining_bin: &'a [u64], + remaining_range: &'a [f64], + } + + impl<'a> ::core::iter::Iterator for IterHistogram<'a> { + type Item = ((f64, f64), u64); + fn next(&mut self) -> Option<((f64, f64), u64)> { + if let Some((&bin, rest)) = self.remaining_bin.split_first() { + let left = self.remaining_range[0]; + let right = self.remaining_range[1]; + self.remaining_bin = rest; + self.remaining_range = &self.remaining_range[1..]; + return Some(((left, right), bin)); + } + None + } + } + + impl<'a> ::core::iter::IntoIterator for &'a Histogram { + type Item = ((f64, f64), u64); + type IntoIter = IterHistogram<'a>; + fn into_iter(self) -> IterHistogram<'a> { + IterHistogram { + remaining_bin: self.bins(), + remaining_range: self.ranges(), + } + } + } + + impl $crate::Histogram for Histogram { + #[inline] + fn bins(&self) -> &[u64] { + &self.bin[..] + } + } + + impl<'a> ::core::ops::AddAssign<&'a Self> for Histogram { + #[inline] + fn add_assign(&mut self, other: &Self) { + for (a, b) in self.range.iter().zip(other.range.iter()) { + assert_eq!(a, b, "Both histograms must have the same ranges"); + } + for (x, y) in self.bin.iter_mut().zip(other.bin.iter()) { + *x += y; + } + } + } + + impl ::core::ops::MulAssign for Histogram { + #[inline] + fn mul_assign(&mut self, other: u64) { + for x in &mut self.bin[..] { + *x *= other; + } + } + } + + impl $crate::Merge for Histogram { + fn merge(&mut self, other: &Self) { + assert_eq!(self.bin.len(), other.bin.len()); + for (a, b) in self.range.iter().zip(other.range.iter()) { + assert_eq!(a, b, "Both histograms must have the same ranges"); + } + for (a, b) in self.bin.iter_mut().zip(other.bin.iter()) { + *a += *b; + } + } + } + ); +} + +#[cfg(feature = "serde1")] +#[doc(hidden)] +#[macro_export] +macro_rules! define_histogram_inner { + ($name:ident, $LEN:expr) => ( + mod $name { + $crate::define_histogram_common!($LEN); + + use ::serde::{Serialize, Deserialize}; + serde_big_array::big_array! { + BigArray; LEN, (LEN + 1), + } + + /// A histogram with a number of bins known at compile time. + #[derive(Clone, Serialize, Deserialize)] + pub struct Histogram { + /// The ranges defining the bins of the histogram. + #[serde(with = "BigArray")] + range: [f64; LEN + 1], + /// The bins of the histogram. + #[serde(with = "BigArray")] + bin: [u64; LEN], + } + } + ); +} + +#[cfg(not(feature = "serde1"))] +#[doc(hidden)] +#[macro_export] +macro_rules! define_histogram_inner { + ($name:ident, $LEN:expr) => ( + mod $name { + $crate::define_histogram_common!($LEN); + + /// A histogram with a number of bins known at compile time. + #[derive(Clone)] + pub struct Histogram { + /// The ranges defining the bins of the histogram. + range: [f64; LEN + 1], + /// The bins of the histogram. + bin: [u64; LEN], + } + } + ); +} + /// Define a histogram with a number of bins known at compile time. /// /// Because macros are not hygenic for items, everything is defined in a private @@ -21,231 +282,5 @@ /// ``` #[macro_export] macro_rules! define_histogram { - ($name:ident, $LEN:expr) => ( - mod $name { - use $crate::Histogram as Trait; - #[cfg(feature = "serde1")] use ::serde::{Serialize, Deserialize}; - #[cfg(feature = "serde1")] serde_big_array::big_array! { - BigArray; LEN, (LEN + 1), - } - - /// The number of bins of the histogram. - const LEN: usize = $LEN; - - /// A histogram with a number of bins known at compile time. - #[derive(Clone)] - #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] - pub struct Histogram { - /// The ranges defining the bins of the histogram. - #[cfg_attr(feature = "serde1", serde(with = "BigArray"))] - range: [f64; LEN + 1], - /// The bins of the histogram. - #[cfg_attr(feature = "serde1", serde(with = "BigArray"))] - bin: [u64; LEN], - } - - impl ::core::fmt::Debug for Histogram { - fn fmt(&self, formatter: &mut ::core::fmt::Formatter<'_>) - -> ::core::fmt::Result { - formatter.write_str("Histogram {{ range: ")?; - self.range[..].fmt(formatter)?; - formatter.write_str(", bins: ")?; - self.bin[..].fmt(formatter)?; - formatter.write_str(" }}") - } - } - - impl Histogram { - /// Construct a histogram with constant bin width. - #[inline] - pub fn with_const_width(start: f64, end: f64) -> Self { - let step = (end - start) / (LEN as f64); - let mut range = [0.; LEN + 1]; - for (i, r) in range.iter_mut().enumerate() { - *r = start + step * (i as f64); - } - - Self { - range, - bin: [0; LEN], - } - } - - /// Construct a histogram from given ranges. - /// - /// The ranges are given by an iterator of floats where neighboring - /// pairs `(a, b)` define a bin for all `x` where `a <= x < b`. - /// - /// Fails if the iterator is too short (less than `n + 1` where `n` - /// is the number of bins), is not sorted or contains `nan`. `inf` - /// and empty ranges are allowed. - #[inline] - pub fn from_ranges(ranges: T) -> Result - where T: IntoIterator - { - let mut range = [0.; LEN + 1]; - let mut last_i = 0; - for (i, r) in ranges.into_iter().enumerate() { - if i > LEN { - break; - } - if r.is_nan() { - return Err(()); - } - if i > 0 && range[i - 1] > r { - return Err(()); - } - range[i] = r; - last_i = i; - } - if last_i != LEN { - return Err(()); - } - Ok(Self { - range, - bin: [0; LEN], - }) - } - - /// Find the index of the bin corresponding to the given sample. - /// - /// Fails if the sample is out of range of the histogram. - #[inline] - pub fn find(&self, x: f64) -> Result { - // We made sure our ranges are valid at construction, so we can - // safely unwrap. - match self.range.binary_search_by(|p| p.partial_cmp(&x).unwrap()) { - Ok(i) if i < LEN => { - Ok(i) - }, - Err(i) if i > 0 && i < LEN + 1 => { - Ok(i - 1) - }, - _ => { - Err(()) - }, - } - } - - /// Add a sample to the histogram. - /// - /// Fails if the sample is out of range of the histogram. - #[inline] - pub fn add(&mut self, x: f64) -> Result<(), ()> { - if let Ok(i) = self.find(x) { - self.bin[i] += 1; - Ok(()) - } else { - Err(()) - } - } - - /// Return the ranges of the histogram. - #[inline] - pub fn ranges(&self) -> &[f64] { - &self.range[..] - } - - /// Return an iterator over the bins and corresponding ranges: - /// `((lower, upper), count)` - #[inline] - pub fn iter(&self) -> IterHistogram<'_> { - self.into_iter() - } - - /// Reset all bins to zero. - #[inline] - pub fn reset(&mut self) { - self.bin = [0; LEN]; - } - - /// Return the lower range limit. - /// - /// (The corresponding bin might be empty.) - #[inline] - pub fn range_min(&self) -> f64 { - self.range[0] - } - - /// Return the upper range limit. - /// - /// (The corresponding bin might be empty.) - #[inline] - pub fn range_max(&self) -> f64 { - self.range[LEN] - } - } - - /// Iterate over all `(range, count)` pairs in the histogram. - pub struct IterHistogram<'a> { - remaining_bin: &'a [u64], - remaining_range: &'a [f64], - } - - impl<'a> ::core::iter::Iterator for IterHistogram<'a> { - type Item = ((f64, f64), u64); - fn next(&mut self) -> Option<((f64, f64), u64)> { - if let Some((&bin, rest)) = self.remaining_bin.split_first() { - let left = self.remaining_range[0]; - let right = self.remaining_range[1]; - self.remaining_bin = rest; - self.remaining_range = &self.remaining_range[1..]; - return Some(((left, right), bin)); - } - None - } - } - - impl<'a> ::core::iter::IntoIterator for &'a Histogram { - type Item = ((f64, f64), u64); - type IntoIter = IterHistogram<'a>; - fn into_iter(self) -> IterHistogram<'a> { - IterHistogram { - remaining_bin: self.bins(), - remaining_range: self.ranges(), - } - } - } - - impl $crate::Histogram for Histogram { - #[inline] - fn bins(&self) -> &[u64] { - &self.bin[..] - } - } - - impl<'a> ::core::ops::AddAssign<&'a Self> for Histogram { - #[inline] - fn add_assign(&mut self, other: &Self) { - for (a, b) in self.range.iter().zip(other.range.iter()) { - assert_eq!(a, b, "Both histograms must have the same ranges"); - } - for (x, y) in self.bin.iter_mut().zip(other.bin.iter()) { - *x += y; - } - } - } - - impl ::core::ops::MulAssign for Histogram { - #[inline] - fn mul_assign(&mut self, other: u64) { - for x in &mut self.bin[..] { - *x *= other; - } - } - } - - impl $crate::Merge for Histogram { - fn merge(&mut self, other: &Self) { - assert_eq!(self.bin.len(), other.bin.len()); - for (a, b) in self.range.iter().zip(other.range.iter()) { - assert_eq!(a, b, "Both histograms must have the same ranges"); - } - for (a, b) in self.bin.iter_mut().zip(other.bin.iter()) { - *a += *b; - } - } - } - } - ); + ($name:ident, $LEN:expr) => ($crate::define_histogram_inner!($name, $LEN);); } diff --git a/src/moments/mod.rs b/src/moments/mod.rs index 778f3ce..5883b9d 100644 --- a/src/moments/mod.rs +++ b/src/moments/mod.rs @@ -13,50 +13,12 @@ include!("kurtosis.rs"); /// Alias for `Variance`. pub type MeanWithError = Variance; -/// Define an estimator of all moments up to a number given at compile time. -/// -/// This uses a [general algorithm][paper] and is slightly less efficient than -/// the specialized implementations (such as [`Mean`], [`Variance`], -/// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4. -/// -/// (In practise, there is an upper limit due to integer overflow and possibly -/// numerical issues.) -/// -/// [paper]: https://doi.org/10.1007/s00180-015-0637-z. -/// [`Mean`]: ./struct.Mean.html -/// [`Variance`]: ./struct.Variance.html -/// [`Skewness`]: ./struct.Skewness.html -/// [`Kurtosis`]: ./struct.Kurtosis.html -/// -/// -/// # Example -/// -/// ``` -/// use average::{define_moments, assert_almost_eq}; -/// -/// define_moments!(Moments4, 4); -/// -/// let mut a: Moments4 = (1..6).map(f64::from).collect(); -/// assert_eq!(a.len(), 5); -/// assert_eq!(a.mean(), 3.0); -/// assert_eq!(a.central_moment(0), 1.0); -/// assert_eq!(a.central_moment(1), 0.0); -/// assert_eq!(a.central_moment(2), 2.0); -/// assert_eq!(a.standardized_moment(0), 5.0); -/// assert_eq!(a.standardized_moment(1), 0.0); -/// assert_eq!(a.standardized_moment(2), 1.0); -/// a.add(1.0); -/// // skewness -/// assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15); -/// // kurtosis -/// assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14); -/// ``` +#[doc(hidden)] #[macro_export] -macro_rules! define_moments { +macro_rules! define_moments_common { ($name:ident, $MAX_MOMENT:expr) => ( use ::conv::ApproxFrom; use ::num_traits::pow; - #[cfg(feature = "serde1")] use ::serde::{Serialize, Deserialize}; /// An iterator over binomial coefficients. struct IterBinomial { @@ -98,23 +60,6 @@ macro_rules! define_moments { /// The maximal order of the moment to be calculated. const MAX_MOMENT: usize = $MAX_MOMENT; - /// Estimate the first N moments of a sequence of numbers ("population"). - #[derive(Debug, Clone)] - #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] - pub struct $name { - /// Number of samples. - /// - /// Technically, this is the same as m_0, but we want this to be an integer - /// to avoid numerical issues, so we store it separately. - n: u64, - /// Average. - avg: f64, - /// Moments times `n`. - /// - /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition. - m: [f64; MAX_MOMENT - 1], - } - impl $name { /// Create a new moments estimator. #[inline] @@ -298,3 +243,100 @@ macro_rules! define_moments { $crate::impl_from_iterator!($name); ); } + +#[cfg(feature = "serde1")] +#[doc(hidden)] +#[macro_export] +macro_rules! define_moments_inner { + ($name:ident, $MAX_MOMENT:expr) => ( + $crate::define_moments_common!($name, $MAX_MOMENT); + + use ::serde::{Serialize, Deserialize}; + + /// Estimate the first N moments of a sequence of numbers ("population"). + #[derive(Debug, Clone, Serialize, Deserialize)] + pub struct $name { + /// Number of samples. + /// + /// Technically, this is the same as m_0, but we want this to be an integer + /// to avoid numerical issues, so we store it separately. + n: u64, + /// Average. + avg: f64, + /// Moments times `n`. + /// + /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition. + m: [f64; MAX_MOMENT - 1], + } + + ); +} + +#[cfg(not(feature = "serde1"))] +#[doc(hidden)] +#[macro_export] +macro_rules! define_moments_inner { + ($name:ident, $MAX_MOMENT:expr) => ( + $crate::define_moments_common!($name, $MAX_MOMENT); + + /// Estimate the first N moments of a sequence of numbers ("population"). + #[derive(Debug, Clone)] + pub struct $name { + /// Number of samples. + /// + /// Technically, this is the same as m_0, but we want this to be an integer + /// to avoid numerical issues, so we store it separately. + n: u64, + /// Average. + avg: f64, + /// Moments times `n`. + /// + /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition. + m: [f64; MAX_MOMENT - 1], + } + + ); +} + +/// Define an estimator of all moments up to a number given at compile time. +/// +/// This uses a [general algorithm][paper] and is slightly less efficient than +/// the specialized implementations (such as [`Mean`], [`Variance`], +/// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4. +/// +/// (In practise, there is an upper limit due to integer overflow and possibly +/// numerical issues.) +/// +/// [paper]: https://doi.org/10.1007/s00180-015-0637-z. +/// [`Mean`]: ./struct.Mean.html +/// [`Variance`]: ./struct.Variance.html +/// [`Skewness`]: ./struct.Skewness.html +/// [`Kurtosis`]: ./struct.Kurtosis.html +/// +/// +/// # Example +/// +/// ``` +/// use average::{define_moments, assert_almost_eq}; +/// +/// define_moments!(Moments4, 4); +/// +/// let mut a: Moments4 = (1..6).map(f64::from).collect(); +/// assert_eq!(a.len(), 5); +/// assert_eq!(a.mean(), 3.0); +/// assert_eq!(a.central_moment(0), 1.0); +/// assert_eq!(a.central_moment(1), 0.0); +/// assert_eq!(a.central_moment(2), 2.0); +/// assert_eq!(a.standardized_moment(0), 5.0); +/// assert_eq!(a.standardized_moment(1), 0.0); +/// assert_eq!(a.standardized_moment(2), 1.0); +/// a.add(1.0); +/// // skewness +/// assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15); +/// // kurtosis +/// assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14); +/// ``` +#[macro_export] +macro_rules! define_moments { + ($name:ident, $MAX_MOMENT:expr) => ($crate::define_moments_inner!($name, $MAX_MOMENT);); +}