Add a ticker stat for number of keys skipped during iteration

Summary:
This diff adds a new ticker stat, NUMBER_ITER_SKIP, to count the
number of internal keys skipped during iteration. Keys can be skipped
due to deletes, or lower sequence number, or higher sequence number
than the one requested.

Also, fix the issue when StatisticsData is naturally aligned on cacheline boundary,
padding becomes a zero size array, which the Windows compiler doesn't
like. So add a cacheline worth of padding in that case to keep it happy.
We cannot conditionally add padding as gcc doesn't allow using sizeof
in preprocessor directives.
Closes https://github.com/facebook/rocksdb/pull/3177

Differential Revision: D6353897

Pulled By: anand1976

fbshipit-source-id: 441d5a09af9c4e22e7355242dfc0c7b27aa0a6c2
This commit is contained in:
anand1976 2017-11-20 21:12:55 -08:00 committed by Facebook Github Bot
parent 578f36e431
commit d394a6bb48
5 changed files with 103 additions and 4 deletions

View File

@ -76,6 +76,7 @@ class DBIter final: public Iterator {
prev_count_ = 0;
prev_found_count_ = 0;
bytes_read_ = 0;
skip_count_ = 0;
}
void BumpGlobalStatistics(Statistics* global_statistics) {
@ -84,6 +85,7 @@ class DBIter final: public Iterator {
RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_);
RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_);
RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_);
RecordTick(global_statistics, NUMBER_ITER_SKIP, skip_count_);
PERF_COUNTER_ADD(iter_read_bytes, bytes_read_);
ResetCounters();
}
@ -98,6 +100,8 @@ class DBIter final: public Iterator {
uint64_t prev_found_count_;
// Map to Tickers::ITER_BYTES_READ
uint64_t bytes_read_;
// Map to Tickers::NUMBER_ITER_SKIP
uint64_t skip_count_;
};
DBIter(Env* _env, const ReadOptions& read_options,
@ -147,6 +151,7 @@ class DBIter final: public Iterator {
// Compiler warning issue filed:
// https://github.com/facebook/rocksdb/issues/3013
RecordTick(statistics_, NO_ITERATORS, uint64_t(-1));
ResetInternalKeysSkippedCounter();
local_stats_.BumpGlobalStatistics(statistics_);
if (!arena_mode_) {
delete iter_;
@ -267,6 +272,10 @@ class DBIter final: public Iterator {
}
inline void ResetInternalKeysSkippedCounter() {
local_stats_.skip_count_ += num_internal_keys_skipped_;
if (valid_) {
local_stats_.skip_count_--;
}
num_internal_keys_skipped_ = 0;
}
@ -1143,6 +1152,7 @@ void DBIter::Seek(const Slice& target) {
}
if (statistics_ != nullptr) {
if (valid_) {
// Decrement since we don't want to count this key as skipped
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
@ -1269,7 +1279,8 @@ void DBIter::SeekToLast() {
if (!Valid()) {
return;
} else if (user_comparator_->Equal(*iterate_upper_bound_, key())) {
Prev();
ReleaseTempPinnedData();
PrevInternal();
}
} else {
PrevInternal();

View File

@ -2076,6 +2076,87 @@ TEST_F(DBIteratorTest, TableFilter) {
}
}
TEST_F(DBIteratorTest, SkipStatistics) {
Options options = CurrentOptions();
options.statistics = rocksdb::CreateDBStatistics();
DestroyAndReopen(options);
int skip_count = 0;
// write a bunch of kvs to the database.
ASSERT_OK(Put("a", "1"));
ASSERT_OK(Put("b", "1"));
ASSERT_OK(Put("c", "1"));
ASSERT_OK(Flush());
ASSERT_OK(Put("d", "1"));
ASSERT_OK(Put("e", "1"));
ASSERT_OK(Put("f", "1"));
ASSERT_OK(Put("a", "2"));
ASSERT_OK(Put("b", "2"));
ASSERT_OK(Flush());
ASSERT_OK(Delete("d"));
ASSERT_OK(Delete("e"));
ASSERT_OK(Delete("f"));
Iterator* iter = db_->NewIterator(ReadOptions());
int count = 0;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ASSERT_OK(iter->status());
count++;
}
ASSERT_EQ(count, 3);
delete iter;
skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
iter = db_->NewIterator(ReadOptions());
count = 0;
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
ASSERT_OK(iter->status());
count++;
}
ASSERT_EQ(count, 3);
delete iter;
skip_count += 8; // Same as above, but in reverse order
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
ASSERT_OK(Put("aa", "1"));
ASSERT_OK(Put("ab", "1"));
ASSERT_OK(Put("ac", "1"));
ASSERT_OK(Put("ad", "1"));
ASSERT_OK(Flush());
ASSERT_OK(Delete("ab"));
ASSERT_OK(Delete("ac"));
ASSERT_OK(Delete("ad"));
ReadOptions ro;
Slice prefix("b");
ro.iterate_upper_bound = &prefix;
iter = db_->NewIterator(ro);
count = 0;
for(iter->Seek("aa"); iter->Valid(); iter->Next()) {
ASSERT_OK(iter->status());
count++;
}
ASSERT_EQ(count, 1);
delete iter;
skip_count += 6; // 3 deletes + 3 original keys
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
iter = db_->NewIterator(ro);
count = 0;
for(iter->SeekToLast(); iter->Valid(); iter->Prev()) {
ASSERT_OK(iter->status());
count++;
}
ASSERT_EQ(count, 2);
delete iter;
// 3 deletes + 3 original keys + 2 keys of "b" + lower sequence of "a"
skip_count += 9;
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
}
} // namespace rocksdb
int main(int argc, char** argv) {

View File

@ -223,6 +223,9 @@ enum Tickers : uint32_t {
// Number of refill intervals where rate limiter's bytes are fully consumed.
NUMBER_RATE_LIMITER_DRAINS,
// Number of internal keys skipped by Iterator
NUMBER_ITER_SKIP,
TICKER_ENUM_MAX
};
@ -328,6 +331,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"},
{READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"},
{NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"},
{NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"},
};
/**

View File

@ -2523,8 +2523,10 @@ class TickerTypeJni {
return 0x5B;
case rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS:
return 0x5C;
case rocksdb::Tickers::TICKER_ENUM_MAX:
case rocksdb::Tickers::NUMBER_ITER_SKIP:
return 0x5D;
case rocksdb::Tickers::TICKER_ENUM_MAX:
return 0x5E;
default:
// undefined/default
@ -2723,6 +2725,8 @@ class TickerTypeJni {
case 0x5C:
return rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS;
case 0x5D:
return rocksdb::Tickers::NUMBER_ITER_SKIP;
case 0x5E:
return rocksdb::Tickers::TICKER_ENUM_MAX;
default:

View File

@ -76,8 +76,7 @@ class StatisticsImpl : public Statistics {
padding[(CACHE_LINE_SIZE -
(INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) +
INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) %
CACHE_LINE_SIZE) %
CACHE_LINE_SIZE] ROCKSDB_FIELD_UNUSED;
CACHE_LINE_SIZE)] ROCKSDB_FIELD_UNUSED;
};
static_assert(sizeof(StatisticsData) % 64 == 0, "Expected 64-byte aligned");