Performant util/histogram.
Summary: Earlier way to record in histogram=> Linear search BucketLimit array to find the bucket and increment the counter Current way to record in histogram=> Store a HistMap statically which points the buckets of each value in the range [kFirstValue, kLastValue); In the proccess use vectors instead of array's and refactor some code to HistogramHelper class. Test Plan: run db_bench with histogram=1 and see a histogram being printed. Reviewers: dhruba, chip, heyongqiang Reviewed By: chip CC: leveldb Differential Revision: https://reviews.facebook.net/D8265
This commit is contained in:
parent
4dcc0c89f4
commit
009034cf12
4
Makefile
4
Makefile
@ -38,6 +38,7 @@ TESTS = \
|
||||
c_test \
|
||||
cache_test \
|
||||
coding_test \
|
||||
histogram_test \
|
||||
corruption_test \
|
||||
crc32c_test \
|
||||
db_test \
|
||||
@ -149,6 +150,9 @@ cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
|
||||
|
||||
histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
|
||||
|
||||
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <cassert>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include "port/port.h"
|
||||
@ -9,7 +10,10 @@
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
const double Histogram::kBucketLimit[kNumBuckets] = {
|
||||
HistogramBucketMapper::HistogramBucketMapper() :
|
||||
// Add newer bucket index here.
|
||||
// Should be alwyas added in sorted order.
|
||||
bucketValues_({
|
||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45,
|
||||
50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450,
|
||||
500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000,
|
||||
@ -24,30 +28,50 @@ const double Histogram::kBucketLimit[kNumBuckets] = {
|
||||
70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000,
|
||||
180000000, 200000000, 250000000, 300000000, 350000000, 400000000,
|
||||
450000000, 500000000, 600000000, 700000000, 800000000, 900000000,
|
||||
1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000,
|
||||
2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0,
|
||||
5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0,
|
||||
1e200,
|
||||
};
|
||||
1000000000}),
|
||||
maxBucketValue_(bucketValues_.back()),
|
||||
minBucketValue_(bucketValues_.front()) {
|
||||
for (size_t i =0; i < bucketValues_.size(); ++i) {
|
||||
valueIndexMap_[bucketValues_[i]] = i;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {
|
||||
if (value >= maxBucketValue_) {
|
||||
return bucketValues_.size() - 1;
|
||||
} else if ( value >= minBucketValue_ ) {
|
||||
std::map<uint64_t, uint64_t>::const_iterator lowerBound =
|
||||
valueIndexMap_.lower_bound(value);
|
||||
if (lowerBound != valueIndexMap_.end()) {
|
||||
return lowerBound->second;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
const HistogramBucketMapper bucketMapper;
|
||||
}
|
||||
|
||||
|
||||
Histogram::Histogram() :
|
||||
buckets_(std::vector<uint64_t>(bucketMapper.BucketCount(), 0)) {}
|
||||
|
||||
void Histogram::Clear() {
|
||||
min_ = kBucketLimit[kNumBuckets-1];
|
||||
min_ = bucketMapper.LastValue();
|
||||
max_ = 0;
|
||||
num_ = 0;
|
||||
sum_ = 0;
|
||||
sum_squares_ = 0;
|
||||
for (int i = 0; i < kNumBuckets; i++) {
|
||||
buckets_[i] = 0;
|
||||
}
|
||||
buckets_.resize(bucketMapper.BucketCount(), 0);
|
||||
}
|
||||
|
||||
void Histogram::Add(double value) {
|
||||
// Linear search is fast enough for our usage in db_bench
|
||||
int b = 0;
|
||||
while (b < kNumBuckets - 1 && kBucketLimit[b] <= value) {
|
||||
b++;
|
||||
}
|
||||
buckets_[b] += 1.0;
|
||||
void Histogram::Add(uint64_t value) {
|
||||
const size_t index = bucketMapper.IndexForValue(value);
|
||||
buckets_[index] += 1;
|
||||
if (min_ > value) min_ = value;
|
||||
if (max_ < value) max_ = value;
|
||||
num_++;
|
||||
@ -55,13 +79,17 @@ void Histogram::Add(double value) {
|
||||
sum_squares_ += (value * value);
|
||||
}
|
||||
|
||||
void Histogram::Add(double value) {
|
||||
Add(static_cast<uint64_t>(value));
|
||||
}
|
||||
|
||||
void Histogram::Merge(const Histogram& other) {
|
||||
if (other.min_ < min_) min_ = other.min_;
|
||||
if (other.max_ > max_) max_ = other.max_;
|
||||
num_ += other.num_;
|
||||
sum_ += other.sum_;
|
||||
sum_squares_ += other.sum_squares_;
|
||||
for (int b = 0; b < kNumBuckets; b++) {
|
||||
for (int b = 0; b < bucketMapper.BucketCount(); b++) {
|
||||
buckets_[b] += other.buckets_[b];
|
||||
}
|
||||
}
|
||||
@ -73,15 +101,19 @@ double Histogram::Median() const {
|
||||
double Histogram::Percentile(double p) const {
|
||||
double threshold = num_ * (p / 100.0);
|
||||
double sum = 0;
|
||||
for (int b = 0; b < kNumBuckets; b++) {
|
||||
for (int b = 0; b < bucketMapper.BucketCount(); b++) {
|
||||
sum += buckets_[b];
|
||||
if (sum >= threshold) {
|
||||
// Scale linearly within this bucket
|
||||
double left_point = (b == 0) ? 0 : kBucketLimit[b-1];
|
||||
double right_point = kBucketLimit[b];
|
||||
double left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b-1);
|
||||
double right_point = bucketMapper.BucketLimit(b);
|
||||
double left_sum = sum - buckets_[b];
|
||||
double right_sum = sum;
|
||||
double pos = (threshold - left_sum) / (right_sum - left_sum);
|
||||
double pos = 0;
|
||||
double right_left_diff = right_sum - left_sum;
|
||||
if (right_left_diff != 0) {
|
||||
pos = (threshold - left_sum) / (right_sum - left_sum);
|
||||
}
|
||||
double r = left_point + (right_point - left_point) * pos;
|
||||
if (r < min_) r = min_;
|
||||
if (r > max_) r = max_;
|
||||
@ -116,16 +148,16 @@ std::string Histogram::ToString() const {
|
||||
r.append("------------------------------------------------------\n");
|
||||
const double mult = 100.0 / num_;
|
||||
double sum = 0;
|
||||
for (int b = 0; b < kNumBuckets; b++) {
|
||||
for (int b = 0; b < bucketMapper.BucketCount(); b++) {
|
||||
if (buckets_[b] <= 0.0) continue;
|
||||
sum += buckets_[b];
|
||||
snprintf(buf, sizeof(buf),
|
||||
"[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ",
|
||||
((b == 0) ? 0.0 : kBucketLimit[b-1]), // left
|
||||
kBucketLimit[b], // right
|
||||
buckets_[b], // count
|
||||
mult * buckets_[b], // percentage
|
||||
mult * sum); // cumulative percentage
|
||||
"[ %ld, %ld ) %ld %7.3f%% %7.3f%% ",
|
||||
((b == 0) ? 0 : bucketMapper.BucketLimit(b-1)), // left
|
||||
bucketMapper.BucketLimit(b), // right
|
||||
buckets_[b], // count
|
||||
mult * buckets_[b], // percentage
|
||||
mult * sum); // cumulative percentage
|
||||
r.append(buf);
|
||||
|
||||
// Add hash marks based on percentage; 20 marks for 100%.
|
||||
|
@ -5,36 +5,70 @@
|
||||
#ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
|
||||
#define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class HistogramBucketMapper {
|
||||
public:
|
||||
|
||||
HistogramBucketMapper();
|
||||
|
||||
// converts a value to the bucket index.
|
||||
const size_t IndexForValue(const uint64_t value) const;
|
||||
// number of buckets required.
|
||||
|
||||
const size_t BucketCount() const {
|
||||
return bucketValues_.size();
|
||||
}
|
||||
|
||||
uint64_t LastValue() const {
|
||||
return maxBucketValue_;
|
||||
}
|
||||
|
||||
uint64_t FirstValue() const {
|
||||
return minBucketValue_;
|
||||
}
|
||||
|
||||
uint64_t BucketLimit(const uint64_t bucketNumber) const {
|
||||
assert(bucketNumber < BucketCount());
|
||||
return bucketValues_[bucketNumber];
|
||||
}
|
||||
|
||||
private:
|
||||
const std::vector<uint64_t> bucketValues_;
|
||||
const uint64_t maxBucketValue_;
|
||||
const uint64_t minBucketValue_;
|
||||
std::map<uint64_t, uint64_t> valueIndexMap_;
|
||||
};
|
||||
|
||||
class Histogram {
|
||||
public:
|
||||
Histogram() { }
|
||||
~Histogram() { }
|
||||
Histogram();
|
||||
|
||||
void Clear();
|
||||
void Add(uint64_t value);
|
||||
void Add(double value);
|
||||
void Merge(const Histogram& other);
|
||||
|
||||
std::string ToString() const;
|
||||
|
||||
double Median() const;
|
||||
double Percentile(double p) const;
|
||||
double Average() const;
|
||||
double StandardDeviation() const;
|
||||
|
||||
private:
|
||||
double min_;
|
||||
double max_;
|
||||
double num_;
|
||||
double sum_;
|
||||
double sum_squares_;
|
||||
std::vector<uint64_t> buckets_;
|
||||
|
||||
enum { kNumBuckets = 154 };
|
||||
static const double kBucketLimit[kNumBuckets];
|
||||
double buckets_[kNumBuckets];
|
||||
|
||||
double Median() const;
|
||||
double Percentile(double p) const;
|
||||
double Average() const;
|
||||
double StandardDeviation() const;
|
||||
};
|
||||
|
||||
} // namespace leveldb
|
||||
|
57
util/histogram_test.cc
Normal file
57
util/histogram_test.cc
Normal file
@ -0,0 +1,57 @@
|
||||
#include "util/histogram.h"
|
||||
|
||||
#include "util/testharness.h"
|
||||
|
||||
namespace leveldb {
|
||||
|
||||
class HistogramTest { };
|
||||
|
||||
TEST(HistogramTest, BasicOperation) {
|
||||
|
||||
Histogram histogram;
|
||||
for (uint64_t i = 1; i <= 100; i++) {
|
||||
histogram.Add(i);
|
||||
}
|
||||
|
||||
{
|
||||
double median = histogram.Median();
|
||||
// ASSERT_LE(median, 50);
|
||||
ASSERT_GT(median, 0);
|
||||
}
|
||||
|
||||
{
|
||||
double percentile100 = histogram.Percentile(100.0);
|
||||
ASSERT_LE(percentile100, 100.0);
|
||||
ASSERT_GT(percentile100, 0.0);
|
||||
double percentile99 = histogram.Percentile(99.0);
|
||||
double percentile85 = histogram.Percentile(85.0);
|
||||
ASSERT_LE(percentile99, 99.0);
|
||||
ASSERT_TRUE(percentile99 >= percentile85);
|
||||
}
|
||||
|
||||
ASSERT_EQ(histogram.Average(), 50.5); // avg is acurately caluclated.
|
||||
}
|
||||
|
||||
TEST(HistogramTest, EmptyHistogram) {
|
||||
Histogram histogram;
|
||||
ASSERT_EQ(histogram.Median(), 0.0);
|
||||
ASSERT_EQ(histogram.Percentile(85.0), 0.0);
|
||||
ASSERT_EQ(histogram.Average(), 0.0);
|
||||
}
|
||||
|
||||
TEST(HistogramTest, ClearHistogram) {
|
||||
Histogram histogram;
|
||||
for (uint64_t i = 1; i <= 100; i++) {
|
||||
histogram.Add(i);
|
||||
}
|
||||
histogram.Clear();
|
||||
ASSERT_EQ(histogram.Median(), 0);
|
||||
ASSERT_EQ(histogram.Percentile(85.0), 0);
|
||||
ASSERT_EQ(histogram.Average(), 0);
|
||||
}
|
||||
|
||||
} // namespace leveldb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return leveldb::test::RunAllTests();
|
||||
}
|
Loading…
Reference in New Issue
Block a user