Performant util/histogram.

Summary:
Earlier way to record in histogram=>
Linear search BucketLimit array to find the bucket and increment the
counter
Current way to record in histogram=>
Store a HistMap statically which points the buckets of each value in the
range [kFirstValue, kLastValue);

In the proccess use vectors instead of array's and refactor some code to
HistogramHelper class.

Test Plan:
run db_bench with histogram=1 and see a histogram being
printed.

Reviewers: dhruba, chip, heyongqiang

Reviewed By: chip

CC: leveldb

Differential Revision: https://reviews.facebook.net/D8265
This commit is contained in:
Abhishek Kona 2013-01-29 12:23:31 -08:00
parent 4dcc0c89f4
commit 009034cf12
4 changed files with 166 additions and 39 deletions

View File

@ -38,6 +38,7 @@ TESTS = \
c_test \ c_test \
cache_test \ cache_test \
coding_test \ coding_test \
histogram_test \
corruption_test \ corruption_test \
crc32c_test \ crc32c_test \
db_test \ db_test \
@ -149,6 +150,9 @@ cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) $(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) $(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)

View File

@ -2,6 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <cassert>
#include <math.h> #include <math.h>
#include <stdio.h> #include <stdio.h>
#include "port/port.h" #include "port/port.h"
@ -9,7 +10,10 @@
namespace leveldb { namespace leveldb {
const double Histogram::kBucketLimit[kNumBuckets] = { HistogramBucketMapper::HistogramBucketMapper() :
// Add newer bucket index here.
// Should be alwyas added in sorted order.
bucketValues_({
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45,
50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450,
500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000,
@ -24,30 +28,50 @@ const double Histogram::kBucketLimit[kNumBuckets] = {
70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000, 70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000,
180000000, 200000000, 250000000, 300000000, 350000000, 400000000, 180000000, 200000000, 250000000, 300000000, 350000000, 400000000,
450000000, 500000000, 600000000, 700000000, 800000000, 900000000, 450000000, 500000000, 600000000, 700000000, 800000000, 900000000,
1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000, 1000000000}),
2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0, maxBucketValue_(bucketValues_.back()),
5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0, minBucketValue_(bucketValues_.front()) {
1e200, for (size_t i =0; i < bucketValues_.size(); ++i) {
}; valueIndexMap_[bucketValues_[i]] = i;
}
}
const size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {
if (value >= maxBucketValue_) {
return bucketValues_.size() - 1;
} else if ( value >= minBucketValue_ ) {
std::map<uint64_t, uint64_t>::const_iterator lowerBound =
valueIndexMap_.lower_bound(value);
if (lowerBound != valueIndexMap_.end()) {
return lowerBound->second;
} else {
return 0;
}
} else {
return 0;
}
}
namespace {
const HistogramBucketMapper bucketMapper;
}
Histogram::Histogram() :
buckets_(std::vector<uint64_t>(bucketMapper.BucketCount(), 0)) {}
void Histogram::Clear() { void Histogram::Clear() {
min_ = kBucketLimit[kNumBuckets-1]; min_ = bucketMapper.LastValue();
max_ = 0; max_ = 0;
num_ = 0; num_ = 0;
sum_ = 0; sum_ = 0;
sum_squares_ = 0; sum_squares_ = 0;
for (int i = 0; i < kNumBuckets; i++) { buckets_.resize(bucketMapper.BucketCount(), 0);
buckets_[i] = 0;
}
} }
void Histogram::Add(double value) { void Histogram::Add(uint64_t value) {
// Linear search is fast enough for our usage in db_bench const size_t index = bucketMapper.IndexForValue(value);
int b = 0; buckets_[index] += 1;
while (b < kNumBuckets - 1 && kBucketLimit[b] <= value) {
b++;
}
buckets_[b] += 1.0;
if (min_ > value) min_ = value; if (min_ > value) min_ = value;
if (max_ < value) max_ = value; if (max_ < value) max_ = value;
num_++; num_++;
@ -55,13 +79,17 @@ void Histogram::Add(double value) {
sum_squares_ += (value * value); sum_squares_ += (value * value);
} }
void Histogram::Add(double value) {
Add(static_cast<uint64_t>(value));
}
void Histogram::Merge(const Histogram& other) { void Histogram::Merge(const Histogram& other) {
if (other.min_ < min_) min_ = other.min_; if (other.min_ < min_) min_ = other.min_;
if (other.max_ > max_) max_ = other.max_; if (other.max_ > max_) max_ = other.max_;
num_ += other.num_; num_ += other.num_;
sum_ += other.sum_; sum_ += other.sum_;
sum_squares_ += other.sum_squares_; sum_squares_ += other.sum_squares_;
for (int b = 0; b < kNumBuckets; b++) { for (int b = 0; b < bucketMapper.BucketCount(); b++) {
buckets_[b] += other.buckets_[b]; buckets_[b] += other.buckets_[b];
} }
} }
@ -73,15 +101,19 @@ double Histogram::Median() const {
double Histogram::Percentile(double p) const { double Histogram::Percentile(double p) const {
double threshold = num_ * (p / 100.0); double threshold = num_ * (p / 100.0);
double sum = 0; double sum = 0;
for (int b = 0; b < kNumBuckets; b++) { for (int b = 0; b < bucketMapper.BucketCount(); b++) {
sum += buckets_[b]; sum += buckets_[b];
if (sum >= threshold) { if (sum >= threshold) {
// Scale linearly within this bucket // Scale linearly within this bucket
double left_point = (b == 0) ? 0 : kBucketLimit[b-1]; double left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b-1);
double right_point = kBucketLimit[b]; double right_point = bucketMapper.BucketLimit(b);
double left_sum = sum - buckets_[b]; double left_sum = sum - buckets_[b];
double right_sum = sum; double right_sum = sum;
double pos = (threshold - left_sum) / (right_sum - left_sum); double pos = 0;
double right_left_diff = right_sum - left_sum;
if (right_left_diff != 0) {
pos = (threshold - left_sum) / (right_sum - left_sum);
}
double r = left_point + (right_point - left_point) * pos; double r = left_point + (right_point - left_point) * pos;
if (r < min_) r = min_; if (r < min_) r = min_;
if (r > max_) r = max_; if (r > max_) r = max_;
@ -116,13 +148,13 @@ std::string Histogram::ToString() const {
r.append("------------------------------------------------------\n"); r.append("------------------------------------------------------\n");
const double mult = 100.0 / num_; const double mult = 100.0 / num_;
double sum = 0; double sum = 0;
for (int b = 0; b < kNumBuckets; b++) { for (int b = 0; b < bucketMapper.BucketCount(); b++) {
if (buckets_[b] <= 0.0) continue; if (buckets_[b] <= 0.0) continue;
sum += buckets_[b]; sum += buckets_[b];
snprintf(buf, sizeof(buf), snprintf(buf, sizeof(buf),
"[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ", "[ %ld, %ld ) %ld %7.3f%% %7.3f%% ",
((b == 0) ? 0.0 : kBucketLimit[b-1]), // left ((b == 0) ? 0 : bucketMapper.BucketLimit(b-1)), // left
kBucketLimit[b], // right bucketMapper.BucketLimit(b), // right
buckets_[b], // count buckets_[b], // count
mult * buckets_[b], // percentage mult * buckets_[b], // percentage
mult * sum); // cumulative percentage mult * sum); // cumulative percentage

View File

@ -5,36 +5,70 @@
#ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ #ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
#define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_ #define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
#include <cassert>
#include <string> #include <string>
#include <vector>
#include <map>
namespace leveldb { namespace leveldb {
class HistogramBucketMapper {
public:
HistogramBucketMapper();
// converts a value to the bucket index.
const size_t IndexForValue(const uint64_t value) const;
// number of buckets required.
const size_t BucketCount() const {
return bucketValues_.size();
}
uint64_t LastValue() const {
return maxBucketValue_;
}
uint64_t FirstValue() const {
return minBucketValue_;
}
uint64_t BucketLimit(const uint64_t bucketNumber) const {
assert(bucketNumber < BucketCount());
return bucketValues_[bucketNumber];
}
private:
const std::vector<uint64_t> bucketValues_;
const uint64_t maxBucketValue_;
const uint64_t minBucketValue_;
std::map<uint64_t, uint64_t> valueIndexMap_;
};
class Histogram { class Histogram {
public: public:
Histogram() { } Histogram();
~Histogram() { }
void Clear(); void Clear();
void Add(uint64_t value);
void Add(double value); void Add(double value);
void Merge(const Histogram& other); void Merge(const Histogram& other);
std::string ToString() const; std::string ToString() const;
double Median() const;
double Percentile(double p) const;
double Average() const;
double StandardDeviation() const;
private: private:
double min_; double min_;
double max_; double max_;
double num_; double num_;
double sum_; double sum_;
double sum_squares_; double sum_squares_;
std::vector<uint64_t> buckets_;
enum { kNumBuckets = 154 };
static const double kBucketLimit[kNumBuckets];
double buckets_[kNumBuckets];
double Median() const;
double Percentile(double p) const;
double Average() const;
double StandardDeviation() const;
}; };
} // namespace leveldb } // namespace leveldb

57
util/histogram_test.cc Normal file
View File

@ -0,0 +1,57 @@
#include "util/histogram.h"
#include "util/testharness.h"
namespace leveldb {
class HistogramTest { };
TEST(HistogramTest, BasicOperation) {
Histogram histogram;
for (uint64_t i = 1; i <= 100; i++) {
histogram.Add(i);
}
{
double median = histogram.Median();
// ASSERT_LE(median, 50);
ASSERT_GT(median, 0);
}
{
double percentile100 = histogram.Percentile(100.0);
ASSERT_LE(percentile100, 100.0);
ASSERT_GT(percentile100, 0.0);
double percentile99 = histogram.Percentile(99.0);
double percentile85 = histogram.Percentile(85.0);
ASSERT_LE(percentile99, 99.0);
ASSERT_TRUE(percentile99 >= percentile85);
}
ASSERT_EQ(histogram.Average(), 50.5); // avg is acurately caluclated.
}
TEST(HistogramTest, EmptyHistogram) {
Histogram histogram;
ASSERT_EQ(histogram.Median(), 0.0);
ASSERT_EQ(histogram.Percentile(85.0), 0.0);
ASSERT_EQ(histogram.Average(), 0.0);
}
TEST(HistogramTest, ClearHistogram) {
Histogram histogram;
for (uint64_t i = 1; i <= 100; i++) {
histogram.Add(i);
}
histogram.Clear();
ASSERT_EQ(histogram.Median(), 0);
ASSERT_EQ(histogram.Percentile(85.0), 0);
ASSERT_EQ(histogram.Average(), 0);
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}