Add a ticker stat for number of keys skipped during iteration
Summary: This diff adds a new ticker stat, NUMBER_ITER_SKIP, to count the number of internal keys skipped during iteration. Keys can be skipped due to deletes, or lower sequence number, or higher sequence number than the one requested. Also, fix the issue when StatisticsData is naturally aligned on cacheline boundary, padding becomes a zero size array, which the Windows compiler doesn't like. So add a cacheline worth of padding in that case to keep it happy. We cannot conditionally add padding as gcc doesn't allow using sizeof in preprocessor directives. Closes https://github.com/facebook/rocksdb/pull/3177 Differential Revision: D6353897 Pulled By: anand1976 fbshipit-source-id: 441d5a09af9c4e22e7355242dfc0c7b27aa0a6c2
This commit is contained in:
parent
578f36e431
commit
d394a6bb48
@ -76,6 +76,7 @@ class DBIter final: public Iterator {
|
|||||||
prev_count_ = 0;
|
prev_count_ = 0;
|
||||||
prev_found_count_ = 0;
|
prev_found_count_ = 0;
|
||||||
bytes_read_ = 0;
|
bytes_read_ = 0;
|
||||||
|
skip_count_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BumpGlobalStatistics(Statistics* global_statistics) {
|
void BumpGlobalStatistics(Statistics* global_statistics) {
|
||||||
@ -84,6 +85,7 @@ class DBIter final: public Iterator {
|
|||||||
RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_);
|
RecordTick(global_statistics, NUMBER_DB_PREV, prev_count_);
|
||||||
RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_);
|
RecordTick(global_statistics, NUMBER_DB_PREV_FOUND, prev_found_count_);
|
||||||
RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_);
|
RecordTick(global_statistics, ITER_BYTES_READ, bytes_read_);
|
||||||
|
RecordTick(global_statistics, NUMBER_ITER_SKIP, skip_count_);
|
||||||
PERF_COUNTER_ADD(iter_read_bytes, bytes_read_);
|
PERF_COUNTER_ADD(iter_read_bytes, bytes_read_);
|
||||||
ResetCounters();
|
ResetCounters();
|
||||||
}
|
}
|
||||||
@ -98,6 +100,8 @@ class DBIter final: public Iterator {
|
|||||||
uint64_t prev_found_count_;
|
uint64_t prev_found_count_;
|
||||||
// Map to Tickers::ITER_BYTES_READ
|
// Map to Tickers::ITER_BYTES_READ
|
||||||
uint64_t bytes_read_;
|
uint64_t bytes_read_;
|
||||||
|
// Map to Tickers::NUMBER_ITER_SKIP
|
||||||
|
uint64_t skip_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
DBIter(Env* _env, const ReadOptions& read_options,
|
DBIter(Env* _env, const ReadOptions& read_options,
|
||||||
@ -147,6 +151,7 @@ class DBIter final: public Iterator {
|
|||||||
// Compiler warning issue filed:
|
// Compiler warning issue filed:
|
||||||
// https://github.com/facebook/rocksdb/issues/3013
|
// https://github.com/facebook/rocksdb/issues/3013
|
||||||
RecordTick(statistics_, NO_ITERATORS, uint64_t(-1));
|
RecordTick(statistics_, NO_ITERATORS, uint64_t(-1));
|
||||||
|
ResetInternalKeysSkippedCounter();
|
||||||
local_stats_.BumpGlobalStatistics(statistics_);
|
local_stats_.BumpGlobalStatistics(statistics_);
|
||||||
if (!arena_mode_) {
|
if (!arena_mode_) {
|
||||||
delete iter_;
|
delete iter_;
|
||||||
@ -267,6 +272,10 @@ class DBIter final: public Iterator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
inline void ResetInternalKeysSkippedCounter() {
|
inline void ResetInternalKeysSkippedCounter() {
|
||||||
|
local_stats_.skip_count_ += num_internal_keys_skipped_;
|
||||||
|
if (valid_) {
|
||||||
|
local_stats_.skip_count_--;
|
||||||
|
}
|
||||||
num_internal_keys_skipped_ = 0;
|
num_internal_keys_skipped_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1143,6 +1152,7 @@ void DBIter::Seek(const Slice& target) {
|
|||||||
}
|
}
|
||||||
if (statistics_ != nullptr) {
|
if (statistics_ != nullptr) {
|
||||||
if (valid_) {
|
if (valid_) {
|
||||||
|
// Decrement since we don't want to count this key as skipped
|
||||||
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
|
RecordTick(statistics_, NUMBER_DB_SEEK_FOUND);
|
||||||
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
|
RecordTick(statistics_, ITER_BYTES_READ, key().size() + value().size());
|
||||||
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
|
PERF_COUNTER_ADD(iter_read_bytes, key().size() + value().size());
|
||||||
@ -1269,7 +1279,8 @@ void DBIter::SeekToLast() {
|
|||||||
if (!Valid()) {
|
if (!Valid()) {
|
||||||
return;
|
return;
|
||||||
} else if (user_comparator_->Equal(*iterate_upper_bound_, key())) {
|
} else if (user_comparator_->Equal(*iterate_upper_bound_, key())) {
|
||||||
Prev();
|
ReleaseTempPinnedData();
|
||||||
|
PrevInternal();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
PrevInternal();
|
PrevInternal();
|
||||||
|
@ -2076,6 +2076,87 @@ TEST_F(DBIteratorTest, TableFilter) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DBIteratorTest, SkipStatistics) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.statistics = rocksdb::CreateDBStatistics();
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
int skip_count = 0;
|
||||||
|
|
||||||
|
// write a bunch of kvs to the database.
|
||||||
|
ASSERT_OK(Put("a", "1"));
|
||||||
|
ASSERT_OK(Put("b", "1"));
|
||||||
|
ASSERT_OK(Put("c", "1"));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
ASSERT_OK(Put("d", "1"));
|
||||||
|
ASSERT_OK(Put("e", "1"));
|
||||||
|
ASSERT_OK(Put("f", "1"));
|
||||||
|
ASSERT_OK(Put("a", "2"));
|
||||||
|
ASSERT_OK(Put("b", "2"));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
ASSERT_OK(Delete("d"));
|
||||||
|
ASSERT_OK(Delete("e"));
|
||||||
|
ASSERT_OK(Delete("f"));
|
||||||
|
|
||||||
|
Iterator* iter = db_->NewIterator(ReadOptions());
|
||||||
|
int count = 0;
|
||||||
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||||
|
ASSERT_OK(iter->status());
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
ASSERT_EQ(count, 3);
|
||||||
|
delete iter;
|
||||||
|
skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
|
||||||
|
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
|
||||||
|
|
||||||
|
iter = db_->NewIterator(ReadOptions());
|
||||||
|
count = 0;
|
||||||
|
for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
|
||||||
|
ASSERT_OK(iter->status());
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
ASSERT_EQ(count, 3);
|
||||||
|
delete iter;
|
||||||
|
skip_count += 8; // Same as above, but in reverse order
|
||||||
|
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
|
||||||
|
|
||||||
|
ASSERT_OK(Put("aa", "1"));
|
||||||
|
ASSERT_OK(Put("ab", "1"));
|
||||||
|
ASSERT_OK(Put("ac", "1"));
|
||||||
|
ASSERT_OK(Put("ad", "1"));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
ASSERT_OK(Delete("ab"));
|
||||||
|
ASSERT_OK(Delete("ac"));
|
||||||
|
ASSERT_OK(Delete("ad"));
|
||||||
|
|
||||||
|
ReadOptions ro;
|
||||||
|
Slice prefix("b");
|
||||||
|
ro.iterate_upper_bound = &prefix;
|
||||||
|
|
||||||
|
iter = db_->NewIterator(ro);
|
||||||
|
count = 0;
|
||||||
|
for(iter->Seek("aa"); iter->Valid(); iter->Next()) {
|
||||||
|
ASSERT_OK(iter->status());
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
ASSERT_EQ(count, 1);
|
||||||
|
delete iter;
|
||||||
|
skip_count += 6; // 3 deletes + 3 original keys
|
||||||
|
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
|
||||||
|
|
||||||
|
iter = db_->NewIterator(ro);
|
||||||
|
count = 0;
|
||||||
|
for(iter->SeekToLast(); iter->Valid(); iter->Prev()) {
|
||||||
|
ASSERT_OK(iter->status());
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
ASSERT_EQ(count, 2);
|
||||||
|
delete iter;
|
||||||
|
// 3 deletes + 3 original keys + 2 keys of "b" + lower sequence of "a"
|
||||||
|
skip_count += 9;
|
||||||
|
ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
@ -223,6 +223,9 @@ enum Tickers : uint32_t {
|
|||||||
// Number of refill intervals where rate limiter's bytes are fully consumed.
|
// Number of refill intervals where rate limiter's bytes are fully consumed.
|
||||||
NUMBER_RATE_LIMITER_DRAINS,
|
NUMBER_RATE_LIMITER_DRAINS,
|
||||||
|
|
||||||
|
// Number of internal keys skipped by Iterator
|
||||||
|
NUMBER_ITER_SKIP,
|
||||||
|
|
||||||
TICKER_ENUM_MAX
|
TICKER_ENUM_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -328,6 +331,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
|||||||
{READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"},
|
{READ_AMP_ESTIMATE_USEFUL_BYTES, "rocksdb.read.amp.estimate.useful.bytes"},
|
||||||
{READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"},
|
{READ_AMP_TOTAL_READ_BYTES, "rocksdb.read.amp.total.read.bytes"},
|
||||||
{NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"},
|
{NUMBER_RATE_LIMITER_DRAINS, "rocksdb.number.rate_limiter.drains"},
|
||||||
|
{NUMBER_ITER_SKIP, "rocksdb.number.iter.skip"},
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2523,8 +2523,10 @@ class TickerTypeJni {
|
|||||||
return 0x5B;
|
return 0x5B;
|
||||||
case rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS:
|
case rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS:
|
||||||
return 0x5C;
|
return 0x5C;
|
||||||
case rocksdb::Tickers::TICKER_ENUM_MAX:
|
case rocksdb::Tickers::NUMBER_ITER_SKIP:
|
||||||
return 0x5D;
|
return 0x5D;
|
||||||
|
case rocksdb::Tickers::TICKER_ENUM_MAX:
|
||||||
|
return 0x5E;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
// undefined/default
|
// undefined/default
|
||||||
@ -2723,6 +2725,8 @@ class TickerTypeJni {
|
|||||||
case 0x5C:
|
case 0x5C:
|
||||||
return rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS;
|
return rocksdb::Tickers::NUMBER_RATE_LIMITER_DRAINS;
|
||||||
case 0x5D:
|
case 0x5D:
|
||||||
|
return rocksdb::Tickers::NUMBER_ITER_SKIP;
|
||||||
|
case 0x5E:
|
||||||
return rocksdb::Tickers::TICKER_ENUM_MAX;
|
return rocksdb::Tickers::TICKER_ENUM_MAX;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -76,8 +76,7 @@ class StatisticsImpl : public Statistics {
|
|||||||
padding[(CACHE_LINE_SIZE -
|
padding[(CACHE_LINE_SIZE -
|
||||||
(INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) +
|
(INTERNAL_TICKER_ENUM_MAX * sizeof(std::atomic_uint_fast64_t) +
|
||||||
INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) %
|
INTERNAL_HISTOGRAM_ENUM_MAX * sizeof(HistogramImpl)) %
|
||||||
CACHE_LINE_SIZE) %
|
CACHE_LINE_SIZE)] ROCKSDB_FIELD_UNUSED;
|
||||||
CACHE_LINE_SIZE] ROCKSDB_FIELD_UNUSED;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(sizeof(StatisticsData) % 64 == 0, "Expected 64-byte aligned");
|
static_assert(sizeof(StatisticsData) % 64 == 0, "Expected 64-byte aligned");
|
||||||
|
Loading…
Reference in New Issue
Block a user