Add two more StatsLevel (#5027)

Summary:
Statistics cost too much CPU for some use cases. Add two stats levels
so that people can choose to skip two types of expensive stats, timers and
histograms.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5027

Differential Revision: D14252765

Pulled By: siying

fbshipit-source-id: 75ecec9eaa44c06118229df4f80c366115346592
This commit is contained in:
Siying Dong 2019-02-28 10:14:19 -08:00 committed by Facebook Github Bot
parent a661c0d208
commit 5e298f865b
17 changed files with 91 additions and 44 deletions

View File

@ -1,4 +1,7 @@
# Rocksdb Change Log
## Unreleased
### New Features
* Introduce two more stats levels, kExceptHistogramOrTimers and kExceptTimers.
## 6.0.0 (2/19/2019)
### New Features

View File

@ -414,11 +414,10 @@ void CompactionJob::Prepare() {
bottommost_level_ = c->bottommost_level();
if (c->ShouldFormSubcompactions()) {
const uint64_t start_micros = env_->NowMicros();
{
StopWatch sw(env_, stats_, SUBCOMPACTION_SETUP_TIME);
GenSubcompactionBoundaries();
MeasureTime(stats_, SUBCOMPACTION_SETUP_TIME,
env_->NowMicros() - start_micros);
}
assert(sizes_.size() == boundaries_.size() + 1);
for (size_t i = 0; i <= boundaries_.size(); i++) {
@ -426,7 +425,7 @@ void CompactionJob::Prepare() {
Slice* end = i == boundaries_.size() ? nullptr : &boundaries_[i];
compact_->sub_compact_states.emplace_back(c, start, end, sizes_[i]);
}
MeasureTime(stats_, NUM_SUBCOMPACTIONS_SCHEDULED,
RecordInHistogram(stats_, NUM_SUBCOMPACTIONS_SCHEDULED,
compact_->sub_compact_states.size());
} else {
compact_->sub_compact_states.emplace_back(c, nullptr, nullptr);
@ -607,8 +606,9 @@ Status CompactionJob::Run() {
compact_->sub_compact_states[i].compaction_job_stats.cpu_micros;
}
MeasureTime(stats_, COMPACTION_TIME, compaction_stats_.micros);
MeasureTime(stats_, COMPACTION_CPU_TIME, compaction_stats_.cpu_micros);
RecordTimeToHistogram(stats_, COMPACTION_TIME, compaction_stats_.micros);
RecordTimeToHistogram(stats_, COMPACTION_CPU_TIME,
compaction_stats_.cpu_micros);
TEST_SYNC_POINT("CompactionJob::Run:BeforeVerify");

View File

@ -395,7 +395,7 @@ Compaction* UniversalCompactionPicker::PickCompaction(
}
#endif
// update statistics
MeasureTime(ioptions_.statistics, NUM_FILES_IN_SINGLE_COMPACTION,
RecordInHistogram(ioptions_.statistics, NUM_FILES_IN_SINGLE_COMPACTION,
c->inputs(0)->size());
RegisterCompaction(c);

View File

@ -1447,7 +1447,7 @@ Status DBImpl::GetImpl(const ReadOptions& read_options,
RecordTick(stats_, BYTES_READ, size);
PERF_COUNTER_ADD(get_read_bytes, size);
}
MeasureTime(stats_, BYTES_PER_READ, size);
RecordInHistogram(stats_, BYTES_PER_READ, size);
}
return s;
}
@ -1632,7 +1632,7 @@ std::vector<Status> DBImpl::MultiGet(
RecordTick(stats_, NUMBER_MULTIGET_KEYS_READ, num_keys);
RecordTick(stats_, NUMBER_MULTIGET_KEYS_FOUND, num_found);
RecordTick(stats_, NUMBER_MULTIGET_BYTES_READ, bytes_read);
MeasureTime(stats_, BYTES_PER_MULTIGET, bytes_read);
RecordInHistogram(stats_, BYTES_PER_MULTIGET, bytes_read);
PERF_COUNTER_ADD(multiget_read_bytes, bytes_read);
PERF_TIMER_STOP(get_post_process_time);

View File

@ -2418,7 +2418,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
status = Status::CompactionTooLarge();
} else {
// update statistics
MeasureTime(stats_, NUM_FILES_IN_SINGLE_COMPACTION,
RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION,
c->inputs(0)->size());
// There are three things that can change compaction score:
// 1) When flush or compaction finish. This case is covered by

View File

@ -60,7 +60,7 @@ Status DBImplReadOnly::Get(const ReadOptions& read_options,
RecordTick(stats_, NUMBER_KEYS_READ);
size_t size = pinnable_val->size();
RecordTick(stats_, BYTES_READ, size);
MeasureTime(stats_, BYTES_PER_READ, size);
RecordInHistogram(stats_, BYTES_PER_READ, size);
PERF_COUNTER_ADD(get_read_bytes, size);
return s;
}

View File

@ -279,7 +279,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
concurrent_update);
RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
}
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
if (write_options.disableWAL) {
has_unpersisted_data_.store(true, std::memory_order_relaxed);
@ -471,7 +471,7 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
RecordTick(stats_, NUMBER_KEYS_WRITTEN, total_count);
stats->AddDBStats(InternalStats::BYTES_WRITTEN, total_byte_size);
RecordTick(stats_, BYTES_WRITTEN, total_byte_size);
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
PERF_TIMER_STOP(write_pre_and_post_process_time);
@ -602,7 +602,7 @@ Status DBImpl::WriteImplWALOnly(const WriteOptions& write_options,
concurrent_update);
RecordTick(stats_, WRITE_DONE_BY_OTHER, write_done_by_other);
}
MeasureTime(stats_, BYTES_PER_WRITE, total_byte_size);
RecordInHistogram(stats_, BYTES_PER_WRITE, total_byte_size);
PERF_TIMER_STOP(write_pre_and_post_process_time);

View File

@ -415,7 +415,7 @@ Status FlushJob::WriteLevel0Table() {
stats.micros = db_options_.env->NowMicros() - start_micros;
stats.cpu_micros = db_options_.env->NowCPUNanos() / 1000 - start_cpu_micros;
stats.bytes_written = meta_.fd.GetFileSize();
MeasureTime(stats_, FLUSH_TIME, stats.micros);
RecordTimeToHistogram(stats_, FLUSH_TIME, stats.micros);
cfd_->internal_stats()->AddCompactionStats(0 /* level */, stats);
cfd_->internal_stats()->AddCFStats(InternalStats::BYTES_FLUSHED,
meta_.fd.GetFileSize());

View File

@ -64,7 +64,7 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
}
if (update_num_ops_stats) {
MeasureTime(statistics, READ_NUM_MERGE_OPERANDS,
RecordInHistogram(statistics, READ_NUM_MERGE_OPERANDS,
static_cast<uint64_t>(operands.size()));
}

View File

@ -22,7 +22,8 @@ namespace rocksdb {
* 1. Any ticker should be added before TICKER_ENUM_MAX.
* 2. Add a readable string in TickersNameMap below for the newly added ticker.
* 3. Add a corresponding enum value to TickerType.java in the java API
* 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType and toCppTickers
* 4. Add the enum conversions from Java and C++ to portal.h's toJavaTickerType
* and toCppTickers
*/
enum Tickers : uint32_t {
// total block cache misses
@ -443,7 +444,11 @@ struct HistogramData {
double min = 0.0;
};
enum StatsLevel {
enum StatsLevel : uint8_t {
// Disable timer stats, and skip histogram stats
kExceptHistogramOrTimers,
// Skip timer stats
kExceptTimers,
// Collect all stats except time inside mutex lock AND time spent on
// compression.
kExceptDetailedTimers,
@ -468,7 +473,26 @@ class Statistics {
virtual void recordTick(uint32_t tickerType, uint64_t count = 0) = 0;
virtual void setTickerCount(uint32_t tickerType, uint64_t count) = 0;
virtual uint64_t getAndResetTickerCount(uint32_t tickerType) = 0;
virtual void measureTime(uint32_t histogramType, uint64_t time) = 0;
virtual void reportTimeToHistogram(uint32_t histogramType, uint64_t time) {
if (stats_level_ <= StatsLevel::kExceptTimers) {
return;
}
recordInHistogram(histogramType, time);
}
// The function is here only for backward compatibility reason.
// Users implementing their own Statistics class should override
// recordInHistogram() instead and leave measureTime() as it is.
virtual void measureTime(uint32_t /*histogramType*/, uint64_t /*time*/) {
// This is not supposed to be called.
assert(false);
}
virtual void recordInHistogram(uint32_t histogramType, uint64_t time) {
// measureTime() is the old and inaccurate function name.
// To keep backward compatible. If users implement their own
// statistics, which overrides meareTime() but doesn't override
// this function. We forward to measureTime().
measureTime(histogramType, time);
}
// Resets all ticker and histogram stats
virtual Status Reset() {

View File

@ -322,11 +322,14 @@ void StatisticsImpl::recordTick(uint32_t tickerType, uint64_t count) {
}
}
void StatisticsImpl::measureTime(uint32_t histogramType, uint64_t value) {
void StatisticsImpl::recordInHistogram(uint32_t histogramType, uint64_t value) {
assert(histogramType < HISTOGRAM_ENUM_MAX);
if (stats_level_ <= StatsLevel::kExceptHistogramOrTimers) {
return;
}
per_core_stats_.Access()->histograms_[histogramType].Add(value);
if (stats_ && histogramType < HISTOGRAM_ENUM_MAX) {
stats_->measureTime(histogramType, value);
stats_->recordInHistogram(histogramType, value);
}
}

View File

@ -53,7 +53,8 @@ class StatisticsImpl : public Statistics {
virtual void setTickerCount(uint32_t ticker_type, uint64_t count) override;
virtual uint64_t getAndResetTickerCount(uint32_t ticker_type) override;
virtual void recordTick(uint32_t ticker_type, uint64_t count) override;
virtual void measureTime(uint32_t histogram_type, uint64_t value) override;
virtual void recordInHistogram(uint32_t histogram_type,
uint64_t value) override;
virtual Status Reset() override;
virtual std::string ToString() const override;
@ -99,10 +100,17 @@ class StatisticsImpl : public Statistics {
};
// Utility functions
inline void MeasureTime(Statistics* statistics, uint32_t histogram_type,
inline void RecordInHistogram(Statistics* statistics, uint32_t histogram_type,
uint64_t value) {
if (statistics) {
statistics->measureTime(histogram_type, value);
statistics->recordInHistogram(histogram_type, value);
}
}
inline void RecordTimeToHistogram(Statistics* statistics,
uint32_t histogram_type, uint64_t value) {
if (statistics) {
statistics->reportTimeToHistogram(histogram_type, value);
}
}

View File

@ -634,10 +634,10 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents,
block_contents = raw_block_contents;
} else if (type != kNoCompression) {
if (ShouldReportDetailedTime(r->ioptions.env, r->ioptions.statistics)) {
MeasureTime(r->ioptions.statistics, COMPRESSION_TIMES_NANOS,
RecordTimeToHistogram(r->ioptions.statistics, COMPRESSION_TIMES_NANOS,
timer.ElapsedNanos());
}
MeasureTime(r->ioptions.statistics, BYTES_COMPRESSED,
RecordInHistogram(r->ioptions.statistics, BYTES_COMPRESSED,
raw_block_contents.size());
RecordTick(r->ioptions.statistics, NUMBER_BLOCK_COMPRESSED);
} else if (type != r->compression_type) {

View File

@ -379,10 +379,11 @@ Status UncompressBlockContentsForCompressionType(
}
if(ShouldReportDetailedTime(ioptions.env, ioptions.statistics)){
MeasureTime(ioptions.statistics, DECOMPRESSION_TIMES_NANOS,
RecordTimeToHistogram(ioptions.statistics, DECOMPRESSION_TIMES_NANOS,
timer.ElapsedNanos());
}
MeasureTime(ioptions.statistics, BYTES_DECOMPRESSED, contents->data.size());
RecordTimeToHistogram(ioptions.statistics, BYTES_DECOMPRESSED,
contents->data.size());
RecordTick(ioptions.statistics, NUMBER_BLOCK_DECOMPRESSED);
return Status::OK();

View File

@ -560,6 +560,8 @@ DEFINE_bool(verify_checksum, true,
" from storage");
DEFINE_bool(statistics, false, "Database statistics");
DEFINE_int32(stats_level, rocksdb::StatsLevel::kExceptDetailedTimers,
"stats level for statistics");
DEFINE_string(statistics_string, "", "Serialized statistics string");
static class std::shared_ptr<rocksdb::Statistics> dbstats;
@ -6079,6 +6081,9 @@ int db_bench_tool(int argc, char** argv) {
if (FLAGS_statistics) {
dbstats = rocksdb::CreateDBStatistics();
}
if (dbstats) {
dbstats->stats_level_ = static_cast<StatsLevel>(FLAGS_stats_level);
}
FLAGS_compaction_pri_e = (rocksdb::CompactionPri)FLAGS_compaction_pri;
std::vector<std::string> fanout = rocksdb::StringSplit(

View File

@ -22,7 +22,9 @@ class StopWatch {
hist_type_(hist_type),
elapsed_(elapsed),
overwrite_(overwrite),
stats_enabled_(statistics && statistics->HistEnabledForType(hist_type)),
stats_enabled_(statistics &&
statistics->stats_level_ >= StatsLevel::kExceptTimers &&
statistics->HistEnabledForType(hist_type)),
delay_enabled_(delay_enabled),
total_delay_(0),
delay_start_time_(0),
@ -41,9 +43,10 @@ class StopWatch {
*elapsed_ -= total_delay_;
}
if (stats_enabled_) {
statistics_->measureTime(hist_type_,
(elapsed_ != nullptr) ? *elapsed_ :
(env_->NowMicros() - start_time_));
statistics_->reportTimeToHistogram(
hist_type_, (elapsed_ != nullptr)
? *elapsed_
: (env_->NowMicros() - start_time_));
}
}

View File

@ -743,8 +743,8 @@ Status BlobDBImpl::PutBlobValue(const WriteOptions& /*options*/,
RecordTick(statistics_, BLOB_DB_NUM_KEYS_WRITTEN);
RecordTick(statistics_, BLOB_DB_BYTES_WRITTEN, key.size() + value.size());
MeasureTime(statistics_, BLOB_DB_KEY_SIZE, key.size());
MeasureTime(statistics_, BLOB_DB_VALUE_SIZE, value.size());
RecordInHistogram(statistics_, BLOB_DB_KEY_SIZE, key.size());
RecordInHistogram(statistics_, BLOB_DB_VALUE_SIZE, value.size());
return s;
}