Scale histogram bucket size by constant factor

Summary:
The goal is to reduce the number of histogram buckets, particularly now that we print these histograms for each column family. I chose 1.5 as the factor. We can adjust it later to either make buckets more granular or make fewer buckets.
Closes https://github.com/facebook/rocksdb/pull/2139

Differential Revision: D4872076

Pulled By: ajkr

fbshipit-source-id: 87790d782a605506c3d24190a028cecbd7aa564a
This commit is contained in:
Andrew Kryczka 2017-08-21 17:09:20 -07:00 committed by Facebook Github Bot
parent f004307e9b
commit 867fe92e5e
4 changed files with 38 additions and 53 deletions

View File

@ -1,5 +1,7 @@
# Rocksdb Change Log
## Unreleased
### Public API Change
* Users of `Statistics::getHistogramString()` will see fewer histogram buckets and different bucket endpoints.
### New Features
* Add Iterator::Refresh(), which allows users to update the iterator state so that they can avoid some initialization costs of recreating iterators.

View File

@ -23,41 +23,26 @@
namespace rocksdb {
HistogramBucketMapper::HistogramBucketMapper()
:
// Add newer bucket index here.
// Should be always added in sorted order.
HistogramBucketMapper::HistogramBucketMapper() {
// If you change this, you also need to change
// size of array buckets_ in HistogramImpl
bucketValues_(
{1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 12, 14,
16, 18, 20, 25, 30, 35,
40, 45, 50, 60, 70, 80,
90, 100, 120, 140, 160, 180,
200, 250, 300, 350, 400, 450,
500, 600, 700, 800, 900, 1000,
1200, 1400, 1600, 1800, 2000, 2500,
3000, 3500, 4000, 4500, 5000, 6000,
7000, 8000, 9000, 10000, 12000, 14000,
16000, 18000, 20000, 25000, 30000, 35000,
40000, 45000, 50000, 60000, 70000, 80000,
90000, 100000, 120000, 140000, 160000, 180000,
200000, 250000, 300000, 350000, 400000, 450000,
500000, 600000, 700000, 800000, 900000, 1000000,
1200000, 1400000, 1600000, 1800000, 2000000, 2500000,
3000000, 3500000, 4000000, 4500000, 5000000, 6000000,
7000000, 8000000, 9000000, 10000000, 12000000, 14000000,
16000000, 18000000, 20000000, 25000000, 30000000, 35000000,
40000000, 45000000, 50000000, 60000000, 70000000, 80000000,
90000000, 100000000, 120000000, 140000000, 160000000, 180000000,
200000000, 250000000, 300000000, 350000000, 400000000, 450000000,
500000000, 600000000, 700000000, 800000000, 900000000, 1000000000}),
maxBucketValue_(bucketValues_.back()),
minBucketValue_(bucketValues_.front()) {
for (size_t i =0; i < bucketValues_.size(); ++i) {
valueIndexMap_[bucketValues_[i]] = i;
bucketValues_ = {1, 2};
valueIndexMap_ = {{1, 0}, {2, 1}};
double bucket_val = static_cast<double>(bucketValues_.back());
while ((bucket_val = 1.5 * bucket_val) <= static_cast<double>(port::kMaxUint64)) {
bucketValues_.push_back(static_cast<uint64_t>(bucket_val));
// Extracts two most significant digits to make histogram buckets more
// human-readable. E.g., 172 becomes 170.
uint64_t pow_of_ten = 1;
while (bucketValues_.back() / 10 > 10) {
bucketValues_.back() /= 10;
pow_of_ten *= 10;
}
bucketValues_.back() *= pow_of_ten;
valueIndexMap_[bucketValues_.back()] = bucketValues_.size() - 1;
}
maxBucketValue_ = bucketValues_.back();
minBucketValue_ = bucketValues_.front();
}
size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {

View File

@ -45,9 +45,9 @@ class HistogramBucketMapper {
}
private:
const std::vector<uint64_t> bucketValues_;
const uint64_t maxBucketValue_;
const uint64_t minBucketValue_;
std::vector<uint64_t> bucketValues_;
uint64_t maxBucketValue_;
uint64_t minBucketValue_;
std::map<uint64_t, uint64_t> valueIndexMap_;
};
@ -89,7 +89,7 @@ struct HistogramStat {
std::atomic_uint_fast64_t num_;
std::atomic_uint_fast64_t sum_;
std::atomic_uint_fast64_t sum_squares_;
std::atomic_uint_fast64_t buckets_[138]; // 138==BucketMapper::BucketCount()
std::atomic_uint_fast64_t buckets_[109]; // 109==BucketMapper::BucketCount()
const uint64_t num_buckets_;
};

View File

@ -29,33 +29,31 @@ void PopulateHistogram(Histogram& histogram,
}
void BasicOperation(Histogram& histogram) {
PopulateHistogram(histogram, 1, 100, 10);
PopulateHistogram(histogram, 1, 110, 10); // fill up to bucket [70, 110)
HistogramData data;
histogram.Data(&data);
ASSERT_LE(fabs(histogram.Percentile(100.0) - 100.0), kIota);
ASSERT_LE(fabs(data.percentile99 - 99.0), kIota);
ASSERT_LE(fabs(data.percentile95 - 95.0), kIota);
ASSERT_LE(fabs(data.median - 50.0), kIota);
ASSERT_EQ(data.average, 50.5); // avg is acurately calculated.
ASSERT_LT(fabs(data.standard_deviation- 28.86), kIota); //sd is ~= 28.86
ASSERT_LE(fabs(histogram.Percentile(100.0) - 110.0), kIota);
ASSERT_LE(fabs(data.percentile99 - 108.9), kIota); // 99 * 110 / 100
ASSERT_LE(fabs(data.percentile95 - 104.5), kIota); // 95 * 110 / 100
ASSERT_LE(fabs(data.median - 55.0), kIota); // 50 * 110 / 100
ASSERT_EQ(data.average, 55.5); // (1 + 110) / 2
}
void MergeHistogram(Histogram& histogram, Histogram& other) {
PopulateHistogram(histogram, 1, 100);
PopulateHistogram(other, 101, 200);
PopulateHistogram(other, 101, 250);
histogram.Merge(other);
HistogramData data;
histogram.Data(&data);
ASSERT_LE(fabs(histogram.Percentile(100.0) - 200.0), kIota);
ASSERT_LE(fabs(data.percentile99 - 198.0), kIota);
ASSERT_LE(fabs(data.percentile95 - 190.0), kIota);
ASSERT_LE(fabs(data.median - 100.0), kIota);
ASSERT_EQ(data.average, 100.5); // avg is acurately calculated.
ASSERT_LT(fabs(data.standard_deviation - 57.73), kIota); //sd is ~= 57.73
ASSERT_LE(fabs(histogram.Percentile(100.0) - 250.0), kIota);
ASSERT_LE(fabs(data.percentile99 - 247.5), kIota); // 99 * 250 / 100
ASSERT_LE(fabs(data.percentile95 - 237.5), kIota); // 95 * 250 / 100
ASSERT_LE(fabs(data.median - 125.0), kIota); // 50 * 250 / 100
ASSERT_EQ(data.average, 125.5); // (1 + 250) / 2
}
void EmptyHistogram(Histogram& histogram) {