Scale histogram bucket size by constant factor

Summary: The goal is to reduce the number of histogram buckets, particularly now that we print these histograms for each column family. I chose 1.5 as the factor. We can adjust it later to either make buckets more granular or make fewer buckets. Closes https://github.com/facebook/rocksdb/pull/2139 Differential Revision: D4872076 Pulled By: ajkr fbshipit-source-id: 87790d782a605506c3d24190a028cecbd7aa564a
2017-08-21 17:09:20 -07:00 · 2017-08-21 17:09:20 -07:00 · 867fe92e5e
commit 867fe92e5e
parent f004307e9b
4 changed files with 38 additions and 53 deletions
--- a/HISTORY.md
+++ b/HISTORY.md
@ -1,5 +1,7 @@
 # Rocksdb Change Log
 ## Unreleased
+### Public API Change
+* Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints.

 ### New Features
 * Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators.
--- a/monitoring/histogram.cc
+++ b/monitoring/histogram.cc
@ -23,41 +23,26 @@

 namespace rocksdb {

-HistogramBucketMapper::HistogramBucketMapper()
-    :
-      // Add newer bucket index here.
-      // Should be always added in sorted order.
+HistogramBucketMapper::HistogramBucketMapper() {
  // If you change this, you also need to change
  // size of array buckets_ in HistogramImpl
-      bucketValues_(
-          {1,         2,         3,         4,         5,         6,
-           7,         8,         9,         10,        12,        14,
-           16,        18,        20,        25,        30,        35,
-           40,        45,        50,        60,        70,        80,
-           90,        100,       120,       140,       160,       180,
-           200,       250,       300,       350,       400,       450,
-           500,       600,       700,       800,       900,       1000,
-           1200,      1400,      1600,      1800,      2000,      2500,
-           3000,      3500,      4000,      4500,      5000,      6000,
-           7000,      8000,      9000,      10000,     12000,     14000,
-           16000,     18000,     20000,     25000,     30000,     35000,
-           40000,     45000,     50000,     60000,     70000,     80000,
-           90000,     100000,    120000,    140000,    160000,    180000,
-           200000,    250000,    300000,    350000,    400000,    450000,
-           500000,    600000,    700000,    800000,    900000,    1000000,
-           1200000,   1400000,   1600000,   1800000,   2000000,   2500000,
-           3000000,   3500000,   4000000,   4500000,   5000000,   6000000,
-           7000000,   8000000,   9000000,   10000000,  12000000,  14000000,
-           16000000,  18000000,  20000000,  25000000,  30000000,  35000000,
-           40000000,  45000000,  50000000,  60000000,  70000000,  80000000,
-           90000000,  100000000, 120000000, 140000000, 160000000, 180000000,
-           200000000, 250000000, 300000000, 350000000, 400000000, 450000000,
-           500000000, 600000000, 700000000, 800000000, 900000000, 1000000000}),
-      maxBucketValue_(bucketValues_.back()),
-      minBucketValue_(bucketValues_.front()) {
-  for (size_t i =0; i < bucketValues_.size(); ++i) {
-    valueIndexMap_[bucketValues_[i]] = i;
+  bucketValues_ = {1, 2};
+  valueIndexMap_ = {{1, 0}, {2, 1}};
+  double bucket_val = static_cast<double>(bucketValues_.back());
+  while ((bucket_val = 1.5 * bucket_val) <= static_cast<double>(port::kMaxUint64)) {
+    bucketValues_.push_back(static_cast<uint64_t>(bucket_val));
+    // Extracts two most significant digits to make histogram buckets more
+    // human-readable. E.g., 172 becomes 170.
+    uint64_t pow_of_ten = 1;
+    while (bucketValues_.back() / 10 > 10) {
+      bucketValues_.back() /= 10;
+      pow_of_ten *= 10;
    }
+    bucketValues_.back() *= pow_of_ten;
+    valueIndexMap_[bucketValues_.back()] = bucketValues_.size() - 1;
+  }
+  maxBucketValue_ = bucketValues_.back();
+  minBucketValue_ = bucketValues_.front();
 }

 size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {
--- a/monitoring/histogram.h
+++ b/monitoring/histogram.h
@ -45,9 +45,9 @@ class HistogramBucketMapper {
  }

 private:
-  const std::vector<uint64_t> bucketValues_;
-  const uint64_t maxBucketValue_;
-  const uint64_t minBucketValue_;
+  std::vector<uint64_t> bucketValues_;
+  uint64_t maxBucketValue_;
+  uint64_t minBucketValue_;
  std::map<uint64_t, uint64_t> valueIndexMap_;
 };

@ -89,7 +89,7 @@ struct HistogramStat {
  std::atomic_uint_fast64_t num_;
  std::atomic_uint_fast64_t sum_;
  std::atomic_uint_fast64_t sum_squares_;
-  std::atomic_uint_fast64_t buckets_[138]; // 138==BucketMapper::BucketCount()
+  std::atomic_uint_fast64_t buckets_[109]; // 109==BucketMapper::BucketCount()
  const uint64_t num_buckets_;
 };

--- a/monitoring/histogram_test.cc
+++ b/monitoring/histogram_test.cc
@ -29,33 +29,31 @@ void PopulateHistogram(Histogram& histogram,
 }

 void BasicOperation(Histogram& histogram) {
-  PopulateHistogram(histogram, 1, 100, 10);
+  PopulateHistogram(histogram, 1, 110, 10); // fill up to bucket [70, 110)

  HistogramData data;
  histogram.Data(&data);

-  ASSERT_LE(fabs(histogram.Percentile(100.0) - 100.0), kIota);
-  ASSERT_LE(fabs(data.percentile99 - 99.0), kIota);
-  ASSERT_LE(fabs(data.percentile95 - 95.0), kIota);
-  ASSERT_LE(fabs(data.median - 50.0), kIota);
-  ASSERT_EQ(data.average, 50.5);               // avg is acurately calculated.
-  ASSERT_LT(fabs(data.standard_deviation- 28.86), kIota); //sd is ~= 28.86
+  ASSERT_LE(fabs(histogram.Percentile(100.0) - 110.0), kIota);
+  ASSERT_LE(fabs(data.percentile99 - 108.9), kIota);  // 99 * 110 / 100
+  ASSERT_LE(fabs(data.percentile95 - 104.5), kIota);  // 95 * 110 / 100
+  ASSERT_LE(fabs(data.median - 55.0), kIota);  // 50 * 110 / 100
+  ASSERT_EQ(data.average, 55.5);  // (1 + 110) / 2
 }

 void MergeHistogram(Histogram& histogram, Histogram& other) {
  PopulateHistogram(histogram, 1, 100);
-  PopulateHistogram(other, 101, 200);
+  PopulateHistogram(other, 101, 250);
  histogram.Merge(other);

  HistogramData data;
  histogram.Data(&data);

-  ASSERT_LE(fabs(histogram.Percentile(100.0) - 200.0), kIota);
-  ASSERT_LE(fabs(data.percentile99 - 198.0), kIota);
-  ASSERT_LE(fabs(data.percentile95 - 190.0), kIota);
-  ASSERT_LE(fabs(data.median - 100.0), kIota);
-  ASSERT_EQ(data.average, 100.5);                // avg is acurately calculated.
-  ASSERT_LT(fabs(data.standard_deviation - 57.73), kIota); //sd is ~= 57.73
+  ASSERT_LE(fabs(histogram.Percentile(100.0) - 250.0), kIota);
+  ASSERT_LE(fabs(data.percentile99 - 247.5), kIota);  // 99 * 250 / 100
+  ASSERT_LE(fabs(data.percentile95 - 237.5), kIota);  // 95 * 250 / 100
+  ASSERT_LE(fabs(data.median - 125.0), kIota);  // 50 * 250 / 100
+  ASSERT_EQ(data.average, 125.5);  // (1 + 250) / 2
 }

 void EmptyHistogram(Histogram& histogram) {