Allow users to profile a query and see bottleneck of the query
Summary: Provide a framework to profile a query in detail to figure out latency bottleneck. Currently, in Get(), Put() and iterators, 2-3 simple timing is used. We can easily add more profile counters to the framework later. Test Plan: Enable this profiling in seveal existing tests. Reviewers: haobo, dhruba, kailiu, emayanke, vamsi, igor CC: leveldb Differential Revision: https://reviews.facebook.net/D14001
This commit is contained in:
parent
56589ab81f
commit
3d8ac31d71
@ -22,6 +22,7 @@
|
|||||||
#include "rocksdb/memtablerep.h"
|
#include "rocksdb/memtablerep.h"
|
||||||
#include "rocksdb/write_batch.h"
|
#include "rocksdb/write_batch.h"
|
||||||
#include "rocksdb/statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
|
#include "rocksdb/perf_context.h"
|
||||||
#include "port/port.h"
|
#include "port/port.h"
|
||||||
#include "util/bit_set.h"
|
#include "util/bit_set.h"
|
||||||
#include "util/crc32c.h"
|
#include "util/crc32c.h"
|
||||||
@ -350,6 +351,8 @@ DEFINE_int64(stats_interval, 0, "Stats are reported every N operations when "
|
|||||||
DEFINE_int32(stats_per_interval, 0, "Reports additional stats per interval when"
|
DEFINE_int32(stats_per_interval, 0, "Reports additional stats per interval when"
|
||||||
" this is greater than 0.");
|
" this is greater than 0.");
|
||||||
|
|
||||||
|
DEFINE_int32(perf_level, 0, "Level of perf collection");
|
||||||
|
|
||||||
static bool ValidateRateLimit(const char* flagname, double value) {
|
static bool ValidateRateLimit(const char* flagname, double value) {
|
||||||
static constexpr double EPSILON = 1e-10;
|
static constexpr double EPSILON = 1e-10;
|
||||||
if ( value < -EPSILON ) {
|
if ( value < -EPSILON ) {
|
||||||
@ -689,6 +692,7 @@ struct SharedState {
|
|||||||
port::Mutex mu;
|
port::Mutex mu;
|
||||||
port::CondVar cv;
|
port::CondVar cv;
|
||||||
int total;
|
int total;
|
||||||
|
int perf_level;
|
||||||
|
|
||||||
// Each thread goes through the following states:
|
// Each thread goes through the following states:
|
||||||
// (1) initializing
|
// (1) initializing
|
||||||
@ -700,7 +704,7 @@ struct SharedState {
|
|||||||
long num_done;
|
long num_done;
|
||||||
bool start;
|
bool start;
|
||||||
|
|
||||||
SharedState() : cv(&mu) { }
|
SharedState() : cv(&mu), perf_level(FLAGS_perf_level) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
// Per-thread state for concurrent executions of the same benchmark.
|
// Per-thread state for concurrent executions of the same benchmark.
|
||||||
@ -810,6 +814,7 @@ class Benchmark {
|
|||||||
fprintf(stdout, "Memtablerep: vector\n");
|
fprintf(stdout, "Memtablerep: vector\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
fprintf(stdout, "Perf Level: %d\n", FLAGS_perf_level);
|
||||||
|
|
||||||
PrintWarnings();
|
PrintWarnings();
|
||||||
fprintf(stdout, "------------------------------------------------\n");
|
fprintf(stdout, "------------------------------------------------\n");
|
||||||
@ -1150,6 +1155,7 @@ class Benchmark {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SetPerfLevel(static_cast<PerfLevel> (shared->perf_level));
|
||||||
thread->stats.Start(thread->tid);
|
thread->stats.Start(thread->tid);
|
||||||
(arg->bm->*(arg->method))(thread);
|
(arg->bm->*(arg->method))(thread);
|
||||||
thread->stats.Stop();
|
thread->stats.Stop();
|
||||||
|
@ -1035,7 +1035,7 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
|||||||
(unsigned long)m->GetLogNumber());
|
(unsigned long)m->GetLogNumber());
|
||||||
list.push_back(m->NewIterator());
|
list.push_back(m->NewIterator());
|
||||||
}
|
}
|
||||||
Iterator* iter = NewMergingIterator(&internal_comparator_, &list[0],
|
Iterator* iter = NewMergingIterator(env_, &internal_comparator_, &list[0],
|
||||||
list.size());
|
list.size());
|
||||||
const SequenceNumber newest_snapshot = snapshots_.GetNewest();
|
const SequenceNumber newest_snapshot = snapshots_.GetNewest();
|
||||||
const SequenceNumber earliest_seqno_in_memtable =
|
const SequenceNumber earliest_seqno_in_memtable =
|
||||||
@ -2519,7 +2519,7 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
|
|||||||
// Collect iterators for files in L0 - Ln
|
// Collect iterators for files in L0 - Ln
|
||||||
versions_->current()->AddIterators(options, storage_options_, &list);
|
versions_->current()->AddIterators(options, storage_options_, &list);
|
||||||
Iterator* internal_iter =
|
Iterator* internal_iter =
|
||||||
NewMergingIterator(&internal_comparator_, &list[0], list.size());
|
NewMergingIterator(env_, &internal_comparator_, &list[0], list.size());
|
||||||
versions_->current()->Ref();
|
versions_->current()->Ref();
|
||||||
|
|
||||||
cleanup->mu = &mutex_;
|
cleanup->mu = &mutex_;
|
||||||
@ -2555,6 +2555,8 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
|||||||
Status s;
|
Status s;
|
||||||
|
|
||||||
StopWatch sw(env_, options_.statistics, DB_GET);
|
StopWatch sw(env_, options_.statistics, DB_GET);
|
||||||
|
StopWatchNano snapshot_timer(env_, false);
|
||||||
|
StartPerfTimer(&snapshot_timer);
|
||||||
SequenceNumber snapshot;
|
SequenceNumber snapshot;
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
if (options.snapshot != nullptr) {
|
if (options.snapshot != nullptr) {
|
||||||
@ -2583,15 +2585,23 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
|||||||
// s is both in/out. When in, s could either be OK or MergeInProgress.
|
// s is both in/out. When in, s could either be OK or MergeInProgress.
|
||||||
// merge_operands will contain the sequence of merges in the latter case.
|
// merge_operands will contain the sequence of merges in the latter case.
|
||||||
LookupKey lkey(key, snapshot);
|
LookupKey lkey(key, snapshot);
|
||||||
|
BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer);
|
||||||
if (mem->Get(lkey, value, &s, &merge_operands, options_)) {
|
if (mem->Get(lkey, value, &s, &merge_operands, options_)) {
|
||||||
// Done
|
// Done
|
||||||
} else if (imm.Get(lkey, value, &s, &merge_operands, options_)) {
|
} else if (imm.Get(lkey, value, &s, &merge_operands, options_)) {
|
||||||
// Done
|
// Done
|
||||||
} else {
|
} else {
|
||||||
|
StopWatchNano from_files_timer(env_, false);
|
||||||
|
StartPerfTimer(&from_files_timer);
|
||||||
|
|
||||||
current->Get(options, lkey, value, &s, &merge_operands, &stats,
|
current->Get(options, lkey, value, &s, &merge_operands, &stats,
|
||||||
options_, value_found);
|
options_, value_found);
|
||||||
have_stat_update = true;
|
have_stat_update = true;
|
||||||
|
BumpPerfTime(&perf_context.get_from_output_files_time, &from_files_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
StopWatchNano post_process_timer(env_, false);
|
||||||
|
StartPerfTimer(&post_process_timer);
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
|
|
||||||
if (!options_.disable_seek_compaction &&
|
if (!options_.disable_seek_compaction &&
|
||||||
@ -2607,6 +2617,8 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
|||||||
// Note, tickers are atomic now - no lock protection needed any more.
|
// Note, tickers are atomic now - no lock protection needed any more.
|
||||||
RecordTick(options_.statistics, NUMBER_KEYS_READ);
|
RecordTick(options_.statistics, NUMBER_KEYS_READ);
|
||||||
RecordTick(options_.statistics, BYTES_READ, value->size());
|
RecordTick(options_.statistics, BYTES_READ, value->size());
|
||||||
|
BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer);
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2615,6 +2627,8 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
|||||||
std::vector<std::string>* values) {
|
std::vector<std::string>* values) {
|
||||||
|
|
||||||
StopWatch sw(env_, options_.statistics, DB_MULTIGET);
|
StopWatch sw(env_, options_.statistics, DB_MULTIGET);
|
||||||
|
StopWatchNano snapshot_timer(env_, false);
|
||||||
|
StartPerfTimer(&snapshot_timer);
|
||||||
SequenceNumber snapshot;
|
SequenceNumber snapshot;
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
if (options.snapshot != nullptr) {
|
if (options.snapshot != nullptr) {
|
||||||
@ -2646,6 +2660,7 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
|||||||
|
|
||||||
// Keep track of bytes that we read for statistics-recording later
|
// Keep track of bytes that we read for statistics-recording later
|
||||||
uint64_t bytesRead = 0;
|
uint64_t bytesRead = 0;
|
||||||
|
BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer);
|
||||||
|
|
||||||
// For each of the given keys, apply the entire "get" process as follows:
|
// For each of the given keys, apply the entire "get" process as follows:
|
||||||
// First look in the memtable, then in the immutable memtable (if any).
|
// First look in the memtable, then in the immutable memtable (if any).
|
||||||
@ -2672,6 +2687,8 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Post processing (decrement reference counts and record statistics)
|
// Post processing (decrement reference counts and record statistics)
|
||||||
|
StopWatchNano post_process_timer(env_, false);
|
||||||
|
StartPerfTimer(&post_process_timer);
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
if (!options_.disable_seek_compaction &&
|
if (!options_.disable_seek_compaction &&
|
||||||
have_stat_update && current->UpdateStats(stats)) {
|
have_stat_update && current->UpdateStats(stats)) {
|
||||||
@ -2686,6 +2703,7 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
|||||||
RecordTick(options_.statistics, NUMBER_MULTIGET_CALLS);
|
RecordTick(options_.statistics, NUMBER_MULTIGET_CALLS);
|
||||||
RecordTick(options_.statistics, NUMBER_MULTIGET_KEYS_READ, numKeys);
|
RecordTick(options_.statistics, NUMBER_MULTIGET_KEYS_READ, numKeys);
|
||||||
RecordTick(options_.statistics, NUMBER_MULTIGET_BYTES_READ, bytesRead);
|
RecordTick(options_.statistics, NUMBER_MULTIGET_BYTES_READ, bytesRead);
|
||||||
|
BumpPerfTime(&perf_context.get_post_process_time, &post_process_timer);
|
||||||
|
|
||||||
return statList;
|
return statList;
|
||||||
}
|
}
|
||||||
@ -2754,6 +2772,8 @@ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||||
|
StopWatchNano pre_post_process_timer(env_, false);
|
||||||
|
StartPerfTimer(&pre_post_process_timer);
|
||||||
Writer w(&mutex_);
|
Writer w(&mutex_);
|
||||||
w.batch = my_batch;
|
w.batch = my_batch;
|
||||||
w.sync = options.sync;
|
w.sync = options.sync;
|
||||||
@ -2800,12 +2820,13 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
if (options.disableWAL) {
|
if (options.disableWAL) {
|
||||||
flush_on_destroy_ = true;
|
flush_on_destroy_ = true;
|
||||||
}
|
}
|
||||||
|
BumpPerfTime(&perf_context.write_pre_and_post_process_time,
|
||||||
|
&pre_post_process_timer);
|
||||||
|
|
||||||
if (!options.disableWAL) {
|
if (!options.disableWAL) {
|
||||||
StopWatchNano timer(env_);
|
StopWatchNano timer(env_);
|
||||||
StartPerfTimer(&timer);
|
StartPerfTimer(&timer);
|
||||||
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
|
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
|
||||||
BumpPerfTime(&perf_context.wal_write_time, &timer);
|
|
||||||
if (status.ok() && options.sync) {
|
if (status.ok() && options.sync) {
|
||||||
if (options_.use_fsync) {
|
if (options_.use_fsync) {
|
||||||
StopWatch(env_, options_.statistics, WAL_FILE_SYNC_MICROS);
|
StopWatch(env_, options_.statistics, WAL_FILE_SYNC_MICROS);
|
||||||
@ -2815,10 +2836,14 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
status = log_->file()->Sync();
|
status = log_->file()->Sync();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
BumpPerfTime(&perf_context.write_wal_time, &timer);
|
||||||
}
|
}
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
|
StopWatchNano write_memtable_timer(env_, false);
|
||||||
|
StartPerfTimer(&write_memtable_timer);
|
||||||
status = WriteBatchInternal::InsertInto(updates, mem_, &options_, this,
|
status = WriteBatchInternal::InsertInto(updates, mem_, &options_, this,
|
||||||
options_.filter_deletes);
|
options_.filter_deletes);
|
||||||
|
BumpPerfTime(&perf_context.write_memtable_time, &write_memtable_timer);
|
||||||
if (!status.ok()) {
|
if (!status.ok()) {
|
||||||
// Panic for in-memory corruptions
|
// Panic for in-memory corruptions
|
||||||
// Note that existing logic was not sound. Any partial failure writing
|
// Note that existing logic was not sound. Any partial failure writing
|
||||||
@ -2828,6 +2853,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
}
|
}
|
||||||
SetTickerCount(options_.statistics, SEQUENCE_NUMBER, last_sequence);
|
SetTickerCount(options_.statistics, SEQUENCE_NUMBER, last_sequence);
|
||||||
}
|
}
|
||||||
|
StartPerfTimer(&pre_post_process_timer);
|
||||||
LogFlush(options_.info_log);
|
LogFlush(options_.info_log);
|
||||||
mutex_.Lock();
|
mutex_.Lock();
|
||||||
if (status.ok()) {
|
if (status.ok()) {
|
||||||
@ -2855,6 +2881,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
|||||||
if (!writers_.empty()) {
|
if (!writers_.empty()) {
|
||||||
writers_.front()->cv.Signal();
|
writers_.front()->cv.Signal();
|
||||||
}
|
}
|
||||||
|
BumpPerfTime(&perf_context.write_pre_and_post_process_time,
|
||||||
|
&pre_post_process_timer);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,7 +102,8 @@ class DBIter: public Iterator {
|
|||||||
virtual void SeekToLast();
|
virtual void SeekToLast();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void FindNextUserEntry(bool skipping);
|
inline void FindNextUserEntry(bool skipping);
|
||||||
|
void FindNextUserEntryInternal(bool skipping);
|
||||||
void FindPrevUserEntry();
|
void FindPrevUserEntry();
|
||||||
bool ParseKey(ParsedInternalKey* key);
|
bool ParseKey(ParsedInternalKey* key);
|
||||||
void MergeValuesNewToOld();
|
void MergeValuesNewToOld();
|
||||||
@ -191,7 +192,15 @@ void DBIter::Next() {
|
|||||||
//
|
//
|
||||||
// NOTE: In between, saved_key_ can point to a user key that has
|
// NOTE: In between, saved_key_ can point to a user key that has
|
||||||
// a delete marker
|
// a delete marker
|
||||||
void DBIter::FindNextUserEntry(bool skipping) {
|
inline void DBIter::FindNextUserEntry(bool skipping) {
|
||||||
|
StopWatchNano timer(env_, false);
|
||||||
|
StartPerfTimer(&timer);
|
||||||
|
FindNextUserEntryInternal(skipping);
|
||||||
|
BumpPerfTime(&perf_context.find_next_user_entry_time, &timer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Actual implementation of DBIter::FindNextUserEntry()
|
||||||
|
void DBIter::FindNextUserEntryInternal(bool skipping) {
|
||||||
// Loop until we hit an acceptable entry to yield
|
// Loop until we hit an acceptable entry to yield
|
||||||
assert(iter_->Valid());
|
assert(iter_->Valid());
|
||||||
assert(direction_ == kForward);
|
assert(direction_ == kForward);
|
||||||
@ -431,7 +440,10 @@ void DBIter::Seek(const Slice& target) {
|
|||||||
saved_key_.clear();
|
saved_key_.clear();
|
||||||
AppendInternalKey(
|
AppendInternalKey(
|
||||||
&saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek));
|
&saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek));
|
||||||
|
StopWatchNano internal_seek_timer(env_, false);
|
||||||
|
StartPerfTimer(&internal_seek_timer);
|
||||||
iter_->Seek(saved_key_);
|
iter_->Seek(saved_key_);
|
||||||
|
BumpPerfTime(&perf_context.seek_internal_seek_time, &internal_seek_timer);
|
||||||
if (iter_->Valid()) {
|
if (iter_->Valid()) {
|
||||||
FindNextUserEntry(false /*not skipping */);
|
FindNextUserEntry(false /*not skipping */);
|
||||||
} else {
|
} else {
|
||||||
@ -442,7 +454,10 @@ void DBIter::Seek(const Slice& target) {
|
|||||||
void DBIter::SeekToFirst() {
|
void DBIter::SeekToFirst() {
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
ClearSavedValue();
|
ClearSavedValue();
|
||||||
|
StopWatchNano internal_seek_timer(env_, false);
|
||||||
|
StartPerfTimer(&internal_seek_timer);
|
||||||
iter_->SeekToFirst();
|
iter_->SeekToFirst();
|
||||||
|
BumpPerfTime(&perf_context.seek_internal_seek_time, &internal_seek_timer);
|
||||||
if (iter_->Valid()) {
|
if (iter_->Valid()) {
|
||||||
FindNextUserEntry(false /* not skipping */);
|
FindNextUserEntry(false /* not skipping */);
|
||||||
} else {
|
} else {
|
||||||
@ -461,7 +476,10 @@ void DBIter::SeekToLast() {
|
|||||||
|
|
||||||
direction_ = kReverse;
|
direction_ = kReverse;
|
||||||
ClearSavedValue();
|
ClearSavedValue();
|
||||||
|
StopWatchNano internal_seek_timer(env_, false);
|
||||||
|
StartPerfTimer(&internal_seek_timer);
|
||||||
iter_->SeekToLast();
|
iter_->SeekToLast();
|
||||||
|
BumpPerfTime(&perf_context.seek_internal_seek_time, &internal_seek_timer);
|
||||||
FindPrevUserEntry();
|
FindPrevUserEntry();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
#include "rocksdb/compaction_filter.h"
|
#include "rocksdb/compaction_filter.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
|
#include "rocksdb/perf_context.h"
|
||||||
#include "util/hash.h"
|
#include "util/hash.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "util/mutexlock.h"
|
#include "util/mutexlock.h"
|
||||||
@ -1215,7 +1216,13 @@ TEST(DBTest, IterMulti) {
|
|||||||
ASSERT_EQ(IterStatus(iter), "a->va");
|
ASSERT_EQ(IterStatus(iter), "a->va");
|
||||||
iter->Seek("ax");
|
iter->Seek("ax");
|
||||||
ASSERT_EQ(IterStatus(iter), "b->vb");
|
ASSERT_EQ(IterStatus(iter), "b->vb");
|
||||||
|
|
||||||
|
SetPerfLevel(kEnableTime);
|
||||||
|
perf_context.Reset();
|
||||||
iter->Seek("b");
|
iter->Seek("b");
|
||||||
|
ASSERT_TRUE((int) perf_context.seek_internal_seek_time > 0);
|
||||||
|
ASSERT_TRUE((int) perf_context.find_next_user_entry_time > 0);
|
||||||
|
SetPerfLevel(kDisable);
|
||||||
ASSERT_EQ(IterStatus(iter), "b->vb");
|
ASSERT_EQ(IterStatus(iter), "b->vb");
|
||||||
iter->Seek("z");
|
iter->Seek("z");
|
||||||
ASSERT_EQ(IterStatus(iter), "(invalid)");
|
ASSERT_EQ(IterStatus(iter), "(invalid)");
|
||||||
@ -1230,7 +1237,12 @@ TEST(DBTest, IterMulti) {
|
|||||||
// Switch from forward to reverse
|
// Switch from forward to reverse
|
||||||
iter->SeekToFirst();
|
iter->SeekToFirst();
|
||||||
iter->Next();
|
iter->Next();
|
||||||
|
SetPerfLevel(kEnableTime);
|
||||||
|
perf_context.Reset();
|
||||||
iter->Next();
|
iter->Next();
|
||||||
|
ASSERT_EQ(0, (int) perf_context.seek_internal_seek_time);
|
||||||
|
ASSERT_TRUE((int) perf_context.find_next_user_entry_time > 0);
|
||||||
|
SetPerfLevel(kDisable);
|
||||||
iter->Prev();
|
iter->Prev();
|
||||||
ASSERT_EQ(IterStatus(iter), "b->vb");
|
ASSERT_EQ(IterStatus(iter), "b->vb");
|
||||||
|
|
||||||
@ -1590,22 +1602,42 @@ TEST(DBTest, NumImmutableMemTable) {
|
|||||||
|
|
||||||
std::string big_value(1000000, 'x');
|
std::string big_value(1000000, 'x');
|
||||||
std::string num;
|
std::string num;
|
||||||
|
SetPerfLevel(kEnableTime);;
|
||||||
|
|
||||||
ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value));
|
ASSERT_OK(dbfull()->Put(writeOpt, "k1", big_value));
|
||||||
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
|
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
|
||||||
ASSERT_EQ(num, "0");
|
ASSERT_EQ(num, "0");
|
||||||
|
perf_context.Reset();
|
||||||
|
Get("k1");
|
||||||
|
ASSERT_EQ(1, (int) perf_context.get_from_memtable_count);
|
||||||
|
|
||||||
ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value));
|
ASSERT_OK(dbfull()->Put(writeOpt, "k2", big_value));
|
||||||
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
|
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
|
||||||
ASSERT_EQ(num, "1");
|
ASSERT_EQ(num, "1");
|
||||||
|
perf_context.Reset();
|
||||||
|
Get("k1");
|
||||||
|
ASSERT_EQ(2, (int) perf_context.get_from_memtable_count);
|
||||||
|
perf_context.Reset();
|
||||||
|
Get("k2");
|
||||||
|
ASSERT_EQ(1, (int) perf_context.get_from_memtable_count);
|
||||||
|
|
||||||
ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value));
|
ASSERT_OK(dbfull()->Put(writeOpt, "k3", big_value));
|
||||||
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
|
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
|
||||||
ASSERT_EQ(num, "2");
|
ASSERT_EQ(num, "2");
|
||||||
|
perf_context.Reset();
|
||||||
|
Get("k2");
|
||||||
|
ASSERT_EQ(2, (int) perf_context.get_from_memtable_count);
|
||||||
|
perf_context.Reset();
|
||||||
|
Get("k3");
|
||||||
|
ASSERT_EQ(1, (int) perf_context.get_from_memtable_count);
|
||||||
|
perf_context.Reset();
|
||||||
|
Get("k1");
|
||||||
|
ASSERT_EQ(3, (int) perf_context.get_from_memtable_count);
|
||||||
|
|
||||||
dbfull()->Flush(FlushOptions());
|
dbfull()->Flush(FlushOptions());
|
||||||
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
|
ASSERT_TRUE(dbfull()->GetProperty("rocksdb.num-immutable-mem-table", &num));
|
||||||
ASSERT_EQ(num, "0");
|
ASSERT_EQ(num, "0");
|
||||||
|
SetPerfLevel(kDisable);
|
||||||
} while (ChangeCompactOptions());
|
} while (ChangeCompactOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1614,11 +1646,16 @@ TEST(DBTest, FLUSH) {
|
|||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
WriteOptions writeOpt = WriteOptions();
|
WriteOptions writeOpt = WriteOptions();
|
||||||
writeOpt.disableWAL = true;
|
writeOpt.disableWAL = true;
|
||||||
|
SetPerfLevel(kEnableTime);;
|
||||||
ASSERT_OK(dbfull()->Put(writeOpt, "foo", "v1"));
|
ASSERT_OK(dbfull()->Put(writeOpt, "foo", "v1"));
|
||||||
// this will now also flush the last 2 writes
|
// this will now also flush the last 2 writes
|
||||||
dbfull()->Flush(FlushOptions());
|
dbfull()->Flush(FlushOptions());
|
||||||
ASSERT_OK(dbfull()->Put(writeOpt, "bar", "v1"));
|
ASSERT_OK(dbfull()->Put(writeOpt, "bar", "v1"));
|
||||||
|
|
||||||
|
perf_context.Reset();
|
||||||
|
Get("foo");
|
||||||
|
ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0);
|
||||||
|
|
||||||
Reopen();
|
Reopen();
|
||||||
ASSERT_EQ("v1", Get("foo"));
|
ASSERT_EQ("v1", Get("foo"));
|
||||||
ASSERT_EQ("v1", Get("bar"));
|
ASSERT_EQ("v1", Get("bar"));
|
||||||
@ -1630,7 +1667,9 @@ TEST(DBTest, FLUSH) {
|
|||||||
|
|
||||||
Reopen();
|
Reopen();
|
||||||
ASSERT_EQ("v2", Get("bar"));
|
ASSERT_EQ("v2", Get("bar"));
|
||||||
|
perf_context.Reset();
|
||||||
ASSERT_EQ("v2", Get("foo"));
|
ASSERT_EQ("v2", Get("foo"));
|
||||||
|
ASSERT_TRUE((int) perf_context.get_from_output_files_time > 0);
|
||||||
|
|
||||||
writeOpt.disableWAL = false;
|
writeOpt.disableWAL = false;
|
||||||
ASSERT_OK(dbfull()->Put(writeOpt, "bar", "v3"));
|
ASSERT_OK(dbfull()->Put(writeOpt, "bar", "v3"));
|
||||||
@ -1642,6 +1681,8 @@ TEST(DBTest, FLUSH) {
|
|||||||
// has WAL enabled.
|
// has WAL enabled.
|
||||||
ASSERT_EQ("v3", Get("foo"));
|
ASSERT_EQ("v3", Get("foo"));
|
||||||
ASSERT_EQ("v3", Get("bar"));
|
ASSERT_EQ("v3", Get("bar"));
|
||||||
|
|
||||||
|
SetPerfLevel(kDisable);
|
||||||
} while (ChangeCompactOptions());
|
} while (ChangeCompactOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -19,6 +19,8 @@
|
|||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/mutexlock.h"
|
#include "util/mutexlock.h"
|
||||||
#include "util/murmurhash.h"
|
#include "util/murmurhash.h"
|
||||||
|
#include "util/perf_context_imp.h"
|
||||||
|
#include "util/stop_watch.h"
|
||||||
|
|
||||||
namespace std {
|
namespace std {
|
||||||
template <>
|
template <>
|
||||||
@ -162,6 +164,9 @@ void MemTable::Add(SequenceNumber s, ValueType type,
|
|||||||
|
|
||||||
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||||
std::deque<std::string>* operands, const Options& options) {
|
std::deque<std::string>* operands, const Options& options) {
|
||||||
|
StopWatchNano memtable_get_timer(options.env, false);
|
||||||
|
StartPerfTimer(&memtable_get_timer);
|
||||||
|
|
||||||
Slice memkey = key.memtable_key();
|
Slice memkey = key.memtable_key();
|
||||||
std::shared_ptr<MemTableRep::Iterator> iter(
|
std::shared_ptr<MemTableRep::Iterator> iter(
|
||||||
table_->GetIterator(key.user_key()));
|
table_->GetIterator(key.user_key()));
|
||||||
@ -175,7 +180,8 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
|||||||
auto logger = options.info_log;
|
auto logger = options.info_log;
|
||||||
std::string merge_result;
|
std::string merge_result;
|
||||||
|
|
||||||
for (; iter->Valid(); iter->Next()) {
|
bool found_final_value = false;
|
||||||
|
for (; !found_final_value && iter->Valid(); iter->Next()) {
|
||||||
// entry format is:
|
// entry format is:
|
||||||
// klength varint32
|
// klength varint32
|
||||||
// userkey char[klength-8]
|
// userkey char[klength-8]
|
||||||
@ -212,7 +218,8 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
|||||||
if (options.inplace_update_support) {
|
if (options.inplace_update_support) {
|
||||||
GetLock(key.user_key())->Unlock();
|
GetLock(key.user_key())->Unlock();
|
||||||
}
|
}
|
||||||
return true;
|
found_final_value = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
case kTypeDeletion: {
|
case kTypeDeletion: {
|
||||||
if (merge_in_progress) {
|
if (merge_in_progress) {
|
||||||
@ -226,7 +233,8 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
|||||||
} else {
|
} else {
|
||||||
*s = Status::NotFound(Slice());
|
*s = Status::NotFound(Slice());
|
||||||
}
|
}
|
||||||
return true;
|
found_final_value = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
case kTypeMerge: {
|
case kTypeMerge: {
|
||||||
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
||||||
@ -260,10 +268,12 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
|||||||
|
|
||||||
// No change to value, since we have not yet found a Put/Delete
|
// No change to value, since we have not yet found a Put/Delete
|
||||||
|
|
||||||
if (merge_in_progress) {
|
if (!found_final_value && merge_in_progress) {
|
||||||
*s = Status::MergeInProgress("");
|
*s = Status::MergeInProgress("");
|
||||||
}
|
}
|
||||||
return false;
|
BumpPerfTime(&perf_context.get_from_memtable_time, &memtable_get_timer);
|
||||||
|
BumpPerfCount(&perf_context.get_from_memtable_count);
|
||||||
|
return found_final_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MemTable::Update(SequenceNumber seq, ValueType type,
|
bool MemTable::Update(SequenceNumber seq, ValueType type,
|
||||||
|
@ -174,6 +174,13 @@ void ProfileKeyComparison() {
|
|||||||
|
|
||||||
HistogramImpl hist_put;
|
HistogramImpl hist_put;
|
||||||
HistogramImpl hist_get;
|
HistogramImpl hist_get;
|
||||||
|
HistogramImpl hist_get_snapshot;
|
||||||
|
HistogramImpl hist_get_memtable;
|
||||||
|
HistogramImpl hist_get_post_process;
|
||||||
|
HistogramImpl hist_num_memtable_checked;
|
||||||
|
HistogramImpl hist_write_pre_post;
|
||||||
|
HistogramImpl hist_write_wal_time;
|
||||||
|
HistogramImpl hist_write_memtable_time;
|
||||||
|
|
||||||
std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n";
|
std::cout << "Inserting " << FLAGS_total_keys << " key/value pairs\n...\n";
|
||||||
|
|
||||||
@ -192,16 +199,37 @@ void ProfileKeyComparison() {
|
|||||||
|
|
||||||
perf_context.Reset();
|
perf_context.Reset();
|
||||||
db->Put(write_options, key, value);
|
db->Put(write_options, key, value);
|
||||||
|
hist_write_pre_post.Add(perf_context.write_pre_and_post_process_time);
|
||||||
|
hist_write_wal_time.Add(perf_context.write_wal_time);
|
||||||
|
hist_write_memtable_time.Add(perf_context.write_memtable_time);
|
||||||
hist_put.Add(perf_context.user_key_comparison_count);
|
hist_put.Add(perf_context.user_key_comparison_count);
|
||||||
|
|
||||||
perf_context.Reset();
|
perf_context.Reset();
|
||||||
db->Get(read_options, key, &value);
|
db->Get(read_options, key, &value);
|
||||||
|
hist_get_snapshot.Add(perf_context.get_snapshot_time);
|
||||||
|
hist_get_memtable.Add(perf_context.get_from_memtable_time);
|
||||||
|
hist_num_memtable_checked.Add(perf_context.get_from_memtable_count);
|
||||||
|
hist_get_post_process.Add(perf_context.get_post_process_time);
|
||||||
hist_get.Add(perf_context.user_key_comparison_count);
|
hist_get.Add(perf_context.user_key_comparison_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "Put uesr key comparison: \n" << hist_put.ToString()
|
std::cout << "Put uesr key comparison: \n" << hist_put.ToString()
|
||||||
<< "Get uesr key comparison: \n" << hist_get.ToString();
|
<< "Get uesr key comparison: \n" << hist_get.ToString();
|
||||||
|
std::cout << "Put(): Pre and Post Process Time: \n"
|
||||||
|
<< hist_write_pre_post.ToString()
|
||||||
|
<< " Writing WAL time: \n"
|
||||||
|
<< hist_write_wal_time.ToString() << "\n"
|
||||||
|
<< " Writing Mem Table time: \n"
|
||||||
|
<< hist_write_memtable_time.ToString() << "\n";
|
||||||
|
|
||||||
|
std::cout << "Get(): Time to get snapshot: \n"
|
||||||
|
<< hist_get_snapshot.ToString()
|
||||||
|
<< " Time to get value from memtables: \n"
|
||||||
|
<< hist_get_memtable.ToString() << "\n"
|
||||||
|
<< " Number of memtables checked: \n"
|
||||||
|
<< hist_num_memtable_checked.ToString() << "\n"
|
||||||
|
<< " Time to post process: \n"
|
||||||
|
<< hist_get_post_process.ToString() << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(PerfContextTest, KeyComparisonCount) {
|
TEST(PerfContextTest, KeyComparisonCount) {
|
||||||
@ -259,8 +287,8 @@ TEST(PerfContextTest, SeekKeyComparison) {
|
|||||||
db->Put(write_options, key, value);
|
db->Put(write_options, key, value);
|
||||||
auto put_time = timer.ElapsedNanos();
|
auto put_time = timer.ElapsedNanos();
|
||||||
hist_put_time.Add(put_time);
|
hist_put_time.Add(put_time);
|
||||||
hist_wal_time.Add(perf_context.wal_write_time);
|
hist_wal_time.Add(perf_context.write_wal_time);
|
||||||
hist_time_diff.Add(put_time - perf_context.wal_write_time);
|
hist_time_diff.Add(put_time - perf_context.write_wal_time);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "Put time:\n" << hist_put_time.ToString()
|
std::cout << "Put time:\n" << hist_put_time.ToString()
|
||||||
|
@ -2091,7 +2091,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(num <= space);
|
assert(num <= space);
|
||||||
Iterator* result = NewMergingIterator(&icmp_, list, num);
|
Iterator* result = NewMergingIterator(env_, &icmp_, list, num);
|
||||||
delete[] list;
|
delete[] list;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -38,7 +38,27 @@ struct PerfContext {
|
|||||||
uint64_t internal_key_skipped_count;
|
uint64_t internal_key_skipped_count;
|
||||||
// total number of deletes skipped over during iteration
|
// total number of deletes skipped over during iteration
|
||||||
uint64_t internal_delete_skipped_count;
|
uint64_t internal_delete_skipped_count;
|
||||||
uint64_t wal_write_time; // total time spent on writing to WAL
|
|
||||||
|
uint64_t get_snapshot_time; // total time spent on getting snapshot
|
||||||
|
uint64_t get_from_memtable_time; // total time spent on querying memtables
|
||||||
|
uint64_t get_from_memtable_count; // number of mem tables queried
|
||||||
|
// total time spent after Get() finds a key
|
||||||
|
uint64_t get_post_process_time;
|
||||||
|
uint64_t get_from_output_files_time; // total time reading from output files
|
||||||
|
// total time spent on seeking child iters
|
||||||
|
uint64_t seek_child_seek_time;
|
||||||
|
// number of seek issued in child iterators
|
||||||
|
uint64_t seek_child_seek_count;
|
||||||
|
uint64_t seek_min_heap_time; // total time spent on the merge heap
|
||||||
|
// total time spent on seeking the internal entries
|
||||||
|
uint64_t seek_internal_seek_time;
|
||||||
|
// total time spent on iterating internal entries to find the next user entry
|
||||||
|
uint64_t find_next_user_entry_time;
|
||||||
|
// total time spent on pre or post processing when writing a record
|
||||||
|
uint64_t write_pre_and_post_process_time;
|
||||||
|
uint64_t write_wal_time; // total time spent on writing to WAL
|
||||||
|
// total time spent on writing to mem tables
|
||||||
|
uint64_t write_memtable_time;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern __thread PerfContext perf_context;
|
extern __thread PerfContext perf_context;
|
||||||
|
@ -11,8 +11,11 @@
|
|||||||
|
|
||||||
#include "rocksdb/comparator.h"
|
#include "rocksdb/comparator.h"
|
||||||
#include "rocksdb/iterator.h"
|
#include "rocksdb/iterator.h"
|
||||||
|
#include "rocksdb/options.h"
|
||||||
#include "table/iter_heap.h"
|
#include "table/iter_heap.h"
|
||||||
#include "table/iterator_wrapper.h"
|
#include "table/iterator_wrapper.h"
|
||||||
|
#include "util/stop_watch.h"
|
||||||
|
#include "util/perf_context_imp.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
@ -22,10 +25,12 @@ namespace {
|
|||||||
|
|
||||||
class MergingIterator : public Iterator {
|
class MergingIterator : public Iterator {
|
||||||
public:
|
public:
|
||||||
MergingIterator(const Comparator* comparator, Iterator** children, int n)
|
MergingIterator(Env* const env, const Comparator* comparator,
|
||||||
|
Iterator** children, int n)
|
||||||
: comparator_(comparator),
|
: comparator_(comparator),
|
||||||
children_(n),
|
children_(n),
|
||||||
current_(nullptr),
|
current_(nullptr),
|
||||||
|
env_(env),
|
||||||
direction_(kForward),
|
direction_(kForward),
|
||||||
maxHeap_(NewMaxIterHeap(comparator_)),
|
maxHeap_(NewMaxIterHeap(comparator_)),
|
||||||
minHeap_ (NewMinIterHeap(comparator_)) {
|
minHeap_ (NewMinIterHeap(comparator_)) {
|
||||||
@ -71,14 +76,24 @@ class MergingIterator : public Iterator {
|
|||||||
|
|
||||||
virtual void Seek(const Slice& target) {
|
virtual void Seek(const Slice& target) {
|
||||||
ClearHeaps();
|
ClearHeaps();
|
||||||
|
StopWatchNano child_seek_timer(env_, false);
|
||||||
|
StopWatchNano min_heap_timer(env_, false);
|
||||||
for (auto& child : children_) {
|
for (auto& child : children_) {
|
||||||
|
StartPerfTimer(&child_seek_timer);
|
||||||
child.Seek(target);
|
child.Seek(target);
|
||||||
|
BumpPerfTime(&perf_context.seek_child_seek_time, &child_seek_timer);
|
||||||
|
BumpPerfCount(&perf_context.seek_child_seek_count);
|
||||||
|
|
||||||
if (child.Valid()) {
|
if (child.Valid()) {
|
||||||
|
StartPerfTimer(&min_heap_timer);
|
||||||
minHeap_.push(&child);
|
minHeap_.push(&child);
|
||||||
|
BumpPerfTime(&perf_context.seek_min_heap_time, &child_seek_timer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
StartPerfTimer(&min_heap_timer);
|
||||||
FindSmallest();
|
FindSmallest();
|
||||||
direction_ = kForward;
|
direction_ = kForward;
|
||||||
|
BumpPerfTime(&perf_context.seek_min_heap_time, &child_seek_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void Next() {
|
virtual void Next() {
|
||||||
@ -178,6 +193,7 @@ class MergingIterator : public Iterator {
|
|||||||
const Comparator* comparator_;
|
const Comparator* comparator_;
|
||||||
std::vector<IteratorWrapper> children_;
|
std::vector<IteratorWrapper> children_;
|
||||||
IteratorWrapper* current_;
|
IteratorWrapper* current_;
|
||||||
|
Env* const env_;
|
||||||
// Which direction is the iterator moving?
|
// Which direction is the iterator moving?
|
||||||
enum Direction {
|
enum Direction {
|
||||||
kForward,
|
kForward,
|
||||||
@ -214,14 +230,15 @@ void MergingIterator::ClearHeaps() {
|
|||||||
}
|
}
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
Iterator* NewMergingIterator(const Comparator* cmp, Iterator** list, int n) {
|
Iterator* NewMergingIterator(Env* const env, const Comparator* cmp,
|
||||||
|
Iterator** list, int n) {
|
||||||
assert(n >= 0);
|
assert(n >= 0);
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
return NewEmptyIterator();
|
return NewEmptyIterator();
|
||||||
} else if (n == 1) {
|
} else if (n == 1) {
|
||||||
return list[0];
|
return list[0];
|
||||||
} else {
|
} else {
|
||||||
return new MergingIterator(cmp, list, n);
|
return new MergingIterator(env, cmp, list, n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ namespace rocksdb {
|
|||||||
|
|
||||||
class Comparator;
|
class Comparator;
|
||||||
class Iterator;
|
class Iterator;
|
||||||
|
class Env;
|
||||||
|
|
||||||
// Return an iterator that provided the union of the data in
|
// Return an iterator that provided the union of the data in
|
||||||
// children[0,n-1]. Takes ownership of the child iterators and
|
// children[0,n-1]. Takes ownership of the child iterators and
|
||||||
@ -23,6 +24,6 @@ class Iterator;
|
|||||||
//
|
//
|
||||||
// REQUIRES: n >= 0
|
// REQUIRES: n >= 0
|
||||||
extern Iterator* NewMergingIterator(
|
extern Iterator* NewMergingIterator(
|
||||||
const Comparator* comparator, Iterator** children, int n);
|
Env* const env, const Comparator* comparator, Iterator** children, int n);
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -22,7 +22,20 @@ void PerfContext::Reset() {
|
|||||||
block_decompress_time = 0;
|
block_decompress_time = 0;
|
||||||
internal_key_skipped_count = 0;
|
internal_key_skipped_count = 0;
|
||||||
internal_delete_skipped_count = 0;
|
internal_delete_skipped_count = 0;
|
||||||
wal_write_time = 0;
|
write_wal_time = 0;
|
||||||
|
|
||||||
|
get_snapshot_time = 0;
|
||||||
|
get_from_memtable_time = 0;
|
||||||
|
get_from_memtable_count = 0;
|
||||||
|
get_post_process_time = 0;
|
||||||
|
get_from_output_files_time = 0;
|
||||||
|
seek_child_seek_time = 0;
|
||||||
|
seek_child_seek_count = 0;
|
||||||
|
seek_min_heap_time = 0;
|
||||||
|
seek_internal_seek_time = 0;
|
||||||
|
find_next_user_entry_time = 0;
|
||||||
|
write_pre_and_post_process_time = 0;
|
||||||
|
write_memtable_time = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
__thread PerfContext perf_context;
|
__thread PerfContext perf_context;
|
||||||
|
Loading…
Reference in New Issue
Block a user