Performant util/histogram.
Summary: Earlier way to record in histogram=> Linear search BucketLimit array to find the bucket and increment the counter Current way to record in histogram=> Store a HistMap statically which points the buckets of each value in the range [kFirstValue, kLastValue); In the proccess use vectors instead of array's and refactor some code to HistogramHelper class. Test Plan: run db_bench with histogram=1 and see a histogram being printed. Reviewers: dhruba, chip, heyongqiang Reviewed By: chip CC: leveldb Differential Revision: https://reviews.facebook.net/D8265
This commit is contained in:
parent
4dcc0c89f4
commit
009034cf12
4
Makefile
4
Makefile
@ -38,6 +38,7 @@ TESTS = \
|
|||||||
c_test \
|
c_test \
|
||||||
cache_test \
|
cache_test \
|
||||||
coding_test \
|
coding_test \
|
||||||
|
histogram_test \
|
||||||
corruption_test \
|
corruption_test \
|
||||||
crc32c_test \
|
crc32c_test \
|
||||||
db_test \
|
db_test \
|
||||||
@ -149,6 +150,9 @@ cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
|||||||
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
|
$(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
|
||||||
|
|
||||||
|
histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
|
$(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
|
||||||
|
|
||||||
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||||
$(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
|
$(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS)
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
// Use of this source code is governed by a BSD-style license that can be
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
@ -9,7 +10,10 @@
|
|||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
const double Histogram::kBucketLimit[kNumBuckets] = {
|
HistogramBucketMapper::HistogramBucketMapper() :
|
||||||
|
// Add newer bucket index here.
|
||||||
|
// Should be alwyas added in sorted order.
|
||||||
|
bucketValues_({
|
||||||
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45,
|
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45,
|
||||||
50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450,
|
50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450,
|
||||||
500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000,
|
500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000,
|
||||||
@ -24,30 +28,50 @@ const double Histogram::kBucketLimit[kNumBuckets] = {
|
|||||||
70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000,
|
70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000,
|
||||||
180000000, 200000000, 250000000, 300000000, 350000000, 400000000,
|
180000000, 200000000, 250000000, 300000000, 350000000, 400000000,
|
||||||
450000000, 500000000, 600000000, 700000000, 800000000, 900000000,
|
450000000, 500000000, 600000000, 700000000, 800000000, 900000000,
|
||||||
1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000,
|
1000000000}),
|
||||||
2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0,
|
maxBucketValue_(bucketValues_.back()),
|
||||||
5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0,
|
minBucketValue_(bucketValues_.front()) {
|
||||||
1e200,
|
for (size_t i =0; i < bucketValues_.size(); ++i) {
|
||||||
};
|
valueIndexMap_[bucketValues_[i]] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t HistogramBucketMapper::IndexForValue(const uint64_t value) const {
|
||||||
|
if (value >= maxBucketValue_) {
|
||||||
|
return bucketValues_.size() - 1;
|
||||||
|
} else if ( value >= minBucketValue_ ) {
|
||||||
|
std::map<uint64_t, uint64_t>::const_iterator lowerBound =
|
||||||
|
valueIndexMap_.lower_bound(value);
|
||||||
|
if (lowerBound != valueIndexMap_.end()) {
|
||||||
|
return lowerBound->second;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
const HistogramBucketMapper bucketMapper;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Histogram::Histogram() :
|
||||||
|
buckets_(std::vector<uint64_t>(bucketMapper.BucketCount(), 0)) {}
|
||||||
|
|
||||||
void Histogram::Clear() {
|
void Histogram::Clear() {
|
||||||
min_ = kBucketLimit[kNumBuckets-1];
|
min_ = bucketMapper.LastValue();
|
||||||
max_ = 0;
|
max_ = 0;
|
||||||
num_ = 0;
|
num_ = 0;
|
||||||
sum_ = 0;
|
sum_ = 0;
|
||||||
sum_squares_ = 0;
|
sum_squares_ = 0;
|
||||||
for (int i = 0; i < kNumBuckets; i++) {
|
buckets_.resize(bucketMapper.BucketCount(), 0);
|
||||||
buckets_[i] = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Histogram::Add(double value) {
|
void Histogram::Add(uint64_t value) {
|
||||||
// Linear search is fast enough for our usage in db_bench
|
const size_t index = bucketMapper.IndexForValue(value);
|
||||||
int b = 0;
|
buckets_[index] += 1;
|
||||||
while (b < kNumBuckets - 1 && kBucketLimit[b] <= value) {
|
|
||||||
b++;
|
|
||||||
}
|
|
||||||
buckets_[b] += 1.0;
|
|
||||||
if (min_ > value) min_ = value;
|
if (min_ > value) min_ = value;
|
||||||
if (max_ < value) max_ = value;
|
if (max_ < value) max_ = value;
|
||||||
num_++;
|
num_++;
|
||||||
@ -55,13 +79,17 @@ void Histogram::Add(double value) {
|
|||||||
sum_squares_ += (value * value);
|
sum_squares_ += (value * value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Histogram::Add(double value) {
|
||||||
|
Add(static_cast<uint64_t>(value));
|
||||||
|
}
|
||||||
|
|
||||||
void Histogram::Merge(const Histogram& other) {
|
void Histogram::Merge(const Histogram& other) {
|
||||||
if (other.min_ < min_) min_ = other.min_;
|
if (other.min_ < min_) min_ = other.min_;
|
||||||
if (other.max_ > max_) max_ = other.max_;
|
if (other.max_ > max_) max_ = other.max_;
|
||||||
num_ += other.num_;
|
num_ += other.num_;
|
||||||
sum_ += other.sum_;
|
sum_ += other.sum_;
|
||||||
sum_squares_ += other.sum_squares_;
|
sum_squares_ += other.sum_squares_;
|
||||||
for (int b = 0; b < kNumBuckets; b++) {
|
for (int b = 0; b < bucketMapper.BucketCount(); b++) {
|
||||||
buckets_[b] += other.buckets_[b];
|
buckets_[b] += other.buckets_[b];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -73,15 +101,19 @@ double Histogram::Median() const {
|
|||||||
double Histogram::Percentile(double p) const {
|
double Histogram::Percentile(double p) const {
|
||||||
double threshold = num_ * (p / 100.0);
|
double threshold = num_ * (p / 100.0);
|
||||||
double sum = 0;
|
double sum = 0;
|
||||||
for (int b = 0; b < kNumBuckets; b++) {
|
for (int b = 0; b < bucketMapper.BucketCount(); b++) {
|
||||||
sum += buckets_[b];
|
sum += buckets_[b];
|
||||||
if (sum >= threshold) {
|
if (sum >= threshold) {
|
||||||
// Scale linearly within this bucket
|
// Scale linearly within this bucket
|
||||||
double left_point = (b == 0) ? 0 : kBucketLimit[b-1];
|
double left_point = (b == 0) ? 0 : bucketMapper.BucketLimit(b-1);
|
||||||
double right_point = kBucketLimit[b];
|
double right_point = bucketMapper.BucketLimit(b);
|
||||||
double left_sum = sum - buckets_[b];
|
double left_sum = sum - buckets_[b];
|
||||||
double right_sum = sum;
|
double right_sum = sum;
|
||||||
double pos = (threshold - left_sum) / (right_sum - left_sum);
|
double pos = 0;
|
||||||
|
double right_left_diff = right_sum - left_sum;
|
||||||
|
if (right_left_diff != 0) {
|
||||||
|
pos = (threshold - left_sum) / (right_sum - left_sum);
|
||||||
|
}
|
||||||
double r = left_point + (right_point - left_point) * pos;
|
double r = left_point + (right_point - left_point) * pos;
|
||||||
if (r < min_) r = min_;
|
if (r < min_) r = min_;
|
||||||
if (r > max_) r = max_;
|
if (r > max_) r = max_;
|
||||||
@ -116,16 +148,16 @@ std::string Histogram::ToString() const {
|
|||||||
r.append("------------------------------------------------------\n");
|
r.append("------------------------------------------------------\n");
|
||||||
const double mult = 100.0 / num_;
|
const double mult = 100.0 / num_;
|
||||||
double sum = 0;
|
double sum = 0;
|
||||||
for (int b = 0; b < kNumBuckets; b++) {
|
for (int b = 0; b < bucketMapper.BucketCount(); b++) {
|
||||||
if (buckets_[b] <= 0.0) continue;
|
if (buckets_[b] <= 0.0) continue;
|
||||||
sum += buckets_[b];
|
sum += buckets_[b];
|
||||||
snprintf(buf, sizeof(buf),
|
snprintf(buf, sizeof(buf),
|
||||||
"[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ",
|
"[ %ld, %ld ) %ld %7.3f%% %7.3f%% ",
|
||||||
((b == 0) ? 0.0 : kBucketLimit[b-1]), // left
|
((b == 0) ? 0 : bucketMapper.BucketLimit(b-1)), // left
|
||||||
kBucketLimit[b], // right
|
bucketMapper.BucketLimit(b), // right
|
||||||
buckets_[b], // count
|
buckets_[b], // count
|
||||||
mult * buckets_[b], // percentage
|
mult * buckets_[b], // percentage
|
||||||
mult * sum); // cumulative percentage
|
mult * sum); // cumulative percentage
|
||||||
r.append(buf);
|
r.append(buf);
|
||||||
|
|
||||||
// Add hash marks based on percentage; 20 marks for 100%.
|
// Add hash marks based on percentage; 20 marks for 100%.
|
||||||
|
@ -5,36 +5,70 @@
|
|||||||
#ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
|
#ifndef STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
|
||||||
#define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
|
#define STORAGE_LEVELDB_UTIL_HISTOGRAM_H_
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
namespace leveldb {
|
namespace leveldb {
|
||||||
|
|
||||||
|
class HistogramBucketMapper {
|
||||||
|
public:
|
||||||
|
|
||||||
|
HistogramBucketMapper();
|
||||||
|
|
||||||
|
// converts a value to the bucket index.
|
||||||
|
const size_t IndexForValue(const uint64_t value) const;
|
||||||
|
// number of buckets required.
|
||||||
|
|
||||||
|
const size_t BucketCount() const {
|
||||||
|
return bucketValues_.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t LastValue() const {
|
||||||
|
return maxBucketValue_;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t FirstValue() const {
|
||||||
|
return minBucketValue_;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t BucketLimit(const uint64_t bucketNumber) const {
|
||||||
|
assert(bucketNumber < BucketCount());
|
||||||
|
return bucketValues_[bucketNumber];
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const std::vector<uint64_t> bucketValues_;
|
||||||
|
const uint64_t maxBucketValue_;
|
||||||
|
const uint64_t minBucketValue_;
|
||||||
|
std::map<uint64_t, uint64_t> valueIndexMap_;
|
||||||
|
};
|
||||||
|
|
||||||
class Histogram {
|
class Histogram {
|
||||||
public:
|
public:
|
||||||
Histogram() { }
|
Histogram();
|
||||||
~Histogram() { }
|
|
||||||
|
|
||||||
void Clear();
|
void Clear();
|
||||||
|
void Add(uint64_t value);
|
||||||
void Add(double value);
|
void Add(double value);
|
||||||
void Merge(const Histogram& other);
|
void Merge(const Histogram& other);
|
||||||
|
|
||||||
std::string ToString() const;
|
std::string ToString() const;
|
||||||
|
|
||||||
|
double Median() const;
|
||||||
|
double Percentile(double p) const;
|
||||||
|
double Average() const;
|
||||||
|
double StandardDeviation() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
double min_;
|
double min_;
|
||||||
double max_;
|
double max_;
|
||||||
double num_;
|
double num_;
|
||||||
double sum_;
|
double sum_;
|
||||||
double sum_squares_;
|
double sum_squares_;
|
||||||
|
std::vector<uint64_t> buckets_;
|
||||||
|
|
||||||
enum { kNumBuckets = 154 };
|
|
||||||
static const double kBucketLimit[kNumBuckets];
|
|
||||||
double buckets_[kNumBuckets];
|
|
||||||
|
|
||||||
double Median() const;
|
|
||||||
double Percentile(double p) const;
|
|
||||||
double Average() const;
|
|
||||||
double StandardDeviation() const;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace leveldb
|
} // namespace leveldb
|
||||||
|
57
util/histogram_test.cc
Normal file
57
util/histogram_test.cc
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
#include "util/histogram.h"
|
||||||
|
|
||||||
|
#include "util/testharness.h"
|
||||||
|
|
||||||
|
namespace leveldb {
|
||||||
|
|
||||||
|
class HistogramTest { };
|
||||||
|
|
||||||
|
TEST(HistogramTest, BasicOperation) {
|
||||||
|
|
||||||
|
Histogram histogram;
|
||||||
|
for (uint64_t i = 1; i <= 100; i++) {
|
||||||
|
histogram.Add(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
double median = histogram.Median();
|
||||||
|
// ASSERT_LE(median, 50);
|
||||||
|
ASSERT_GT(median, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
double percentile100 = histogram.Percentile(100.0);
|
||||||
|
ASSERT_LE(percentile100, 100.0);
|
||||||
|
ASSERT_GT(percentile100, 0.0);
|
||||||
|
double percentile99 = histogram.Percentile(99.0);
|
||||||
|
double percentile85 = histogram.Percentile(85.0);
|
||||||
|
ASSERT_LE(percentile99, 99.0);
|
||||||
|
ASSERT_TRUE(percentile99 >= percentile85);
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_EQ(histogram.Average(), 50.5); // avg is acurately caluclated.
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(HistogramTest, EmptyHistogram) {
|
||||||
|
Histogram histogram;
|
||||||
|
ASSERT_EQ(histogram.Median(), 0.0);
|
||||||
|
ASSERT_EQ(histogram.Percentile(85.0), 0.0);
|
||||||
|
ASSERT_EQ(histogram.Average(), 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(HistogramTest, ClearHistogram) {
|
||||||
|
Histogram histogram;
|
||||||
|
for (uint64_t i = 1; i <= 100; i++) {
|
||||||
|
histogram.Add(i);
|
||||||
|
}
|
||||||
|
histogram.Clear();
|
||||||
|
ASSERT_EQ(histogram.Median(), 0);
|
||||||
|
ASSERT_EQ(histogram.Percentile(85.0), 0);
|
||||||
|
ASSERT_EQ(histogram.Average(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace leveldb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
return leveldb::test::RunAllTests();
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user