From 867fe92e5e65ce501069aa22c538757acfaade34 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Mon, 21 Aug 2017 17:09:20 -0700 Subject: [PATCH] Scale histogram bucket size by constant factor Summary: The goal is to reduce the number of histogram buckets, particularly now that we print these histograms for each column family. I chose 1.5 as the factor. We can adjust it later to either make buckets more granular or make fewer buckets. Closes https://github.com/facebook/rocksdb/pull/2139 Differential Revision: D4872076 Pulled By: ajkr fbshipit-source-id: 87790d782a605506c3d24190a028cecbd7aa564a --- HISTORY.md | 2 ++ monitoring/histogram.cc | 53 +++++++++++++----------------------- monitoring/histogram.h | 10 +++---- monitoring/histogram_test.cc | 26 ++++++++---------- 4 files changed, 38 insertions(+), 53 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index d4478c7d3..a40a3b892 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,7 @@ # Rocksdb Change Log ## Unreleased +### Public API Change +* Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints. ### New Features * Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators. diff --git a/monitoring/histogram.cc b/monitoring/histogram.cc index 083ef75fd..b3c01a78e 100644 --- a/monitoring/histogram.cc +++ b/monitoring/histogram.cc @@ -23,41 +23,26 @@ namespace rocksdb { -HistogramBucketMapper::HistogramBucketMapper() - : - // Add newer bucket index here. - // Should be always added in sorted order. - // If you change this, you also need to change - // size of array buckets_ in HistogramImpl - bucketValues_( - {1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 12, 14, - 16, 18, 20, 25, 30, 35, - 40, 45, 50, 60, 70, 80, - 90, 100, 120, 140, 160, 180, - 200, 250, 300, 350, 400, 450, - 500, 600, 700, 800, 900, 1000, - 1200, 1400, 1600, 1800, 2000, 2500, - 3000, 3500, 4000, 4500, 5000, 6000, - 7000, 8000, 9000, 10000, 12000, 14000, - 16000, 18000, 20000, 25000, 30000, 35000, - 40000, 45000, 50000, 60000, 70000, 80000, - 90000, 100000, 120000, 140000, 160000, 180000, - 200000, 250000, 300000, 350000, 400000, 450000, - 500000, 600000, 700000, 800000, 900000, 1000000, - 1200000, 1400000, 1600000, 1800000, 2000000, 2500000, - 3000000, 3500000, 4000000, 4500000, 5000000, 6000000, - 7000000, 8000000, 9000000, 10000000, 12000000, 14000000, - 16000000, 18000000, 20000000, 25000000, 30000000, 35000000, - 40000000, 45000000, 50000000, 60000000, 70000000, 80000000, - 90000000, 100000000, 120000000, 140000000, 160000000, 180000000, - 200000000, 250000000, 300000000, 350000000, 400000000, 450000000, - 500000000, 600000000, 700000000, 800000000, 900000000, 1000000000}), - maxBucketValue_(bucketValues_.back()), - minBucketValue_(bucketValues_.front()) { - for (size_t i =0; i < bucketValues_.size(); ++i) { - valueIndexMap_[bucketValues_[i]] = i; +HistogramBucketMapper::HistogramBucketMapper() { + // If you change this, you also need to change + // size of array buckets_ in HistogramImpl + bucketValues_ = {1, 2}; + valueIndexMap_ = {{1, 0}, {2, 1}}; + double bucket_val = static_cast(bucketValues_.back()); + while ((bucket_val = 1.5 * bucket_val) <= static_cast(port::kMaxUint64)) { + bucketValues_.push_back(static_cast(bucket_val)); + // Extracts two most significant digits to make histogram buckets more + // human-readable. E.g., 172 becomes 170. + uint64_t pow_of_ten = 1; + while (bucketValues_.back() / 10 > 10) { + bucketValues_.back() /= 10; + pow_of_ten *= 10; + } + bucketValues_.back() *= pow_of_ten; + valueIndexMap_[bucketValues_.back()] = bucketValues_.size() - 1; } + maxBucketValue_ = bucketValues_.back(); + minBucketValue_ = bucketValues_.front(); } size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const { diff --git a/monitoring/histogram.h b/monitoring/histogram.h index 6a1ebbf04..6bf2e9e93 100644 --- a/monitoring/histogram.h +++ b/monitoring/histogram.h @@ -45,9 +45,9 @@ class HistogramBucketMapper { } private: - const std::vector bucketValues_; - const uint64_t maxBucketValue_; - const uint64_t minBucketValue_; + std::vector bucketValues_; + uint64_t maxBucketValue_; + uint64_t minBucketValue_; std::map valueIndexMap_; }; @@ -89,7 +89,7 @@ struct HistogramStat { std::atomic_uint_fast64_t num_; std::atomic_uint_fast64_t sum_; std::atomic_uint_fast64_t sum_squares_; - std::atomic_uint_fast64_t buckets_[138]; // 138==BucketMapper::BucketCount() + std::atomic_uint_fast64_t buckets_[109]; // 109==BucketMapper::BucketCount() const uint64_t num_buckets_; }; @@ -146,4 +146,4 @@ class HistogramImpl : public Histogram { std::mutex mutex_; }; -} // namespace rocksdb \ No newline at end of file +} // namespace rocksdb diff --git a/monitoring/histogram_test.cc b/monitoring/histogram_test.cc index 70147af72..b4e3c981c 100644 --- a/monitoring/histogram_test.cc +++ b/monitoring/histogram_test.cc @@ -29,33 +29,31 @@ void PopulateHistogram(Histogram& histogram, } void BasicOperation(Histogram& histogram) { - PopulateHistogram(histogram, 1, 100, 10); + PopulateHistogram(histogram, 1, 110, 10); // fill up to bucket [70, 110) HistogramData data; histogram.Data(&data); - ASSERT_LE(fabs(histogram.Percentile(100.0) - 100.0), kIota); - ASSERT_LE(fabs(data.percentile99 - 99.0), kIota); - ASSERT_LE(fabs(data.percentile95 - 95.0), kIota); - ASSERT_LE(fabs(data.median - 50.0), kIota); - ASSERT_EQ(data.average, 50.5); // avg is acurately calculated. - ASSERT_LT(fabs(data.standard_deviation- 28.86), kIota); //sd is ~= 28.86 + ASSERT_LE(fabs(histogram.Percentile(100.0) - 110.0), kIota); + ASSERT_LE(fabs(data.percentile99 - 108.9), kIota); // 99 * 110 / 100 + ASSERT_LE(fabs(data.percentile95 - 104.5), kIota); // 95 * 110 / 100 + ASSERT_LE(fabs(data.median - 55.0), kIota); // 50 * 110 / 100 + ASSERT_EQ(data.average, 55.5); // (1 + 110) / 2 } void MergeHistogram(Histogram& histogram, Histogram& other) { PopulateHistogram(histogram, 1, 100); - PopulateHistogram(other, 101, 200); + PopulateHistogram(other, 101, 250); histogram.Merge(other); HistogramData data; histogram.Data(&data); - ASSERT_LE(fabs(histogram.Percentile(100.0) - 200.0), kIota); - ASSERT_LE(fabs(data.percentile99 - 198.0), kIota); - ASSERT_LE(fabs(data.percentile95 - 190.0), kIota); - ASSERT_LE(fabs(data.median - 100.0), kIota); - ASSERT_EQ(data.average, 100.5); // avg is acurately calculated. - ASSERT_LT(fabs(data.standard_deviation - 57.73), kIota); //sd is ~= 57.73 + ASSERT_LE(fabs(histogram.Percentile(100.0) - 250.0), kIota); + ASSERT_LE(fabs(data.percentile99 - 247.5), kIota); // 99 * 250 / 100 + ASSERT_LE(fabs(data.percentile95 - 237.5), kIota); // 95 * 250 / 100 + ASSERT_LE(fabs(data.median - 125.0), kIota); // 50 * 250 / 100 + ASSERT_EQ(data.average, 125.5); // (1 + 250) / 2 } void EmptyHistogram(Histogram& histogram) {