bloom hit/miss stats for SST and memtable

Summary:
	hit and miss bloom filter stats for memtable and SST
	stats added to perf_context struct
	key matches and prefix matches combined into one stat

Test Plan: unit test veryfing the functionality added, see BloomStatsTest in db_test.cc for details

Reviewers: yhchiang, igor, sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D47859
This commit is contained in:
dyniusz 2015-10-07 11:23:20 -07:00
parent 40cdf797d2
commit a065cdb388
7 changed files with 215 additions and 12 deletions

View File

@ -131,6 +131,46 @@ class DBTestWithParam : public DBTest,
uint32_t max_subcompactions_;
};
class BloomStatsTestWithParam
: public DBTest,
public testing::WithParamInterface<std::tuple<bool, bool>> {
public:
BloomStatsTestWithParam() {
use_block_table_ = std::get<0>(GetParam());
use_block_based_builder_ = std::get<1>(GetParam());
options_.create_if_missing = true;
options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4));
options_.memtable_prefix_bloom_bits = 8 * 1024;
if (use_block_table_) {
BlockBasedTableOptions table_options;
table_options.hash_index_allow_collision = false;
table_options.filter_policy.reset(
NewBloomFilterPolicy(10, use_block_based_builder_));
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
} else {
PlainTableOptions table_options;
options_.table_factory.reset(NewPlainTableFactory(table_options));
}
perf_context.Reset();
DestroyAndReopen(options_);
}
~BloomStatsTestWithParam() {
perf_context.Reset();
Destroy(options_);
}
// Required if inheriting from testing::WithParamInterface<>
static void SetUpTestCase() {}
static void TearDownTestCase() {}
bool use_block_table_;
bool use_block_based_builder_;
Options options_;
};
TEST_F(DBTest, Empty) {
do {
Options options;
@ -9709,6 +9749,119 @@ TEST_F(DBTest, PauseBackgroundWorkTest) {
ASSERT_EQ(true, done.load());
}
// 1 Insert 2 K-V pairs into DB
// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2
// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1
// 4 Call Flush() to create SST
// 5 Call Get() for both keys - expext SST bloom hit stat to be 2
// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1
// Test both: block and plain SST
TEST_P(BloomStatsTestWithParam, BloomStatsTest) {
std::string key1("AAAA");
std::string key2("RXDB"); // not in DB
std::string key3("ZBRA");
std::string value1("Value1");
std::string value3("Value3");
ASSERT_OK(Put(key1, value1, WriteOptions()));
ASSERT_OK(Put(key3, value3, WriteOptions()));
// check memtable bloom stats
ASSERT_EQ(value1, Get(key1));
ASSERT_EQ(1, perf_context.bloom_memtable_hit_count);
ASSERT_EQ(value3, Get(key3));
ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
ASSERT_EQ("NOT_FOUND", Get(key2));
ASSERT_EQ(1, perf_context.bloom_memtable_miss_count);
ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
// sanity checks
ASSERT_EQ(0, perf_context.bloom_sst_hit_count);
ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
Flush();
// sanity checks
ASSERT_EQ(0, perf_context.bloom_sst_hit_count);
ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
// check SST bloom stats
// NOTE: hits per get differs because of code paths differences
// in BlockBasedTable::Get()
int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1;
ASSERT_EQ(value1, Get(key1));
ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count);
ASSERT_EQ(value3, Get(key3));
ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count);
ASSERT_EQ("NOT_FOUND", Get(key2));
ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
}
// Same scenario as in BloomStatsTest but using an iterator
TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
std::string key1("AAAA");
std::string key2("RXDB"); // not in DB
std::string key3("ZBRA");
std::string value1("Value1");
std::string value3("Value3");
ASSERT_OK(Put(key1, value1, WriteOptions()));
ASSERT_OK(Put(key3, value3, WriteOptions()));
unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions()));
// check memtable bloom stats
iter->Seek(key1);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value1, iter->value().ToString());
ASSERT_EQ(1, perf_context.bloom_memtable_hit_count);
ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
iter->Seek(key3);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value3, iter->value().ToString());
ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
iter->Seek(key2);
ASSERT_OK(iter->status());
ASSERT_TRUE(!iter->Valid());
ASSERT_EQ(1, perf_context.bloom_memtable_miss_count);
ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
Flush();
iter.reset(dbfull()->NewIterator(ReadOptions()));
// check SST bloom stats
iter->Seek(key1);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value1, iter->value().ToString());
ASSERT_EQ(1, perf_context.bloom_sst_hit_count);
iter->Seek(key3);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value3, iter->value().ToString());
ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
iter->Seek(key2);
ASSERT_OK(iter->status());
ASSERT_TRUE(!iter->Valid());
ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
}
INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam,
::testing::Values(std::make_tuple(true, true),
std::make_tuple(true, false),
std::make_tuple(false, false)));
} // namespace rocksdb
#endif

View File

@ -230,10 +230,15 @@ class MemTableIterator: public Iterator {
virtual void Seek(const Slice& k) override {
PERF_TIMER_GUARD(seek_on_memtable_time);
PERF_COUNTER_ADD(seek_on_memtable_count, 1);
if (bloom_ != nullptr &&
!bloom_->MayContain(prefix_extractor_->Transform(ExtractUserKey(k)))) {
if (bloom_ != nullptr) {
if (!bloom_->MayContain(
prefix_extractor_->Transform(ExtractUserKey(k)))) {
PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
valid_ = false;
return;
} else {
PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
}
}
iter_->Seek(k, nullptr);
valid_ = iter_->Valid();
@ -508,12 +513,18 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
Slice user_key = key.user_key();
bool found_final_value = false;
bool merge_in_progress = s->IsMergeInProgress();
if (prefix_bloom_ &&
!prefix_bloom_->MayContain(prefix_extractor_->Transform(user_key))) {
bool const may_contain =
nullptr == prefix_bloom_
? false
: prefix_bloom_->MayContain(prefix_extractor_->Transform(user_key));
if (prefix_bloom_ && !may_contain) {
// iter is null if prefix bloom says the key does not exist
PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
*seq = kMaxSequenceNumber;
} else {
if (prefix_bloom_) {
PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
}
Saver saver;
saver.status = s;
saver.found_final_value = &found_final_value;

View File

@ -83,6 +83,14 @@ struct PerfContext {
uint64_t block_seek_nanos;
// Time spent on finding or creating a table reader
uint64_t find_table_nanos;
// total number of mem table bloom hits
uint64_t bloom_memtable_hit_count;
// total number of mem table bloom misses
uint64_t bloom_memtable_miss_count;
// total number of SST table bloom hits
uint64_t bloom_sst_hit_count;
// total number of SST table bloom misses
uint64_t bloom_sst_miss_count;
};
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)

View File

@ -7,12 +7,13 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <algorithm>
#include "table/block_based_filter_block.h"
#include <algorithm>
#include "db/dbformat.h"
#include "rocksdb/filter_policy.h"
#include "util/coding.h"
#include "util/perf_context_imp.h"
#include "util/string_util.h"
namespace rocksdb {
@ -219,7 +220,14 @@ bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
if (start <= limit && limit <= (uint32_t)(offset_ - data_)) {
Slice filter = Slice(data_ + start, limit - start);
return policy_->KeyMayMatch(entry, filter);
bool const may_match = policy_->KeyMayMatch(entry, filter);
if (may_match) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true;
} else {
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
return false;
}
} else if (start == limit) {
// Empty filters do not match any entries
return false;

View File

@ -8,6 +8,7 @@
#include "rocksdb/filter_policy.h"
#include "port/port.h"
#include "util/coding.h"
#include "util/perf_context_imp.h"
namespace rocksdb {
@ -89,7 +90,13 @@ bool FullFilterBlockReader::PrefixMayMatch(const Slice& prefix,
bool FullFilterBlockReader::MayMatch(const Slice& entry) {
if (contents_.size() != 0) {
return filter_bits_reader_->MayMatch(entry);
if (filter_bits_reader_->MayMatch(entry)) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true;
} else {
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
return false;
}
}
return true; // remain the same with block_based filter
}

View File

@ -488,7 +488,17 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
}
bool PlainTableReader::MatchBloom(uint32_t hash) const {
return !enable_bloom_ || bloom_.MayContainHash(hash);
if (!enable_bloom_) {
return true;
}
if (bloom_.MayContainHash(hash)) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true;
} else {
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
return false;
}
}
Status PlainTableReader::Next(PlainTableKeyDecoder* decoder, uint32_t* offset,

View File

@ -54,6 +54,10 @@ void PerfContext::Reset() {
new_table_iterator_nanos = 0;
block_seek_nanos = 0;
find_table_nanos = 0;
bloom_memtable_hit_count = 0;
bloom_memtable_miss_count = 0;
bloom_sst_hit_count = 0;
bloom_sst_miss_count = 0;
#endif
}
@ -80,7 +84,9 @@ std::string PerfContext::ToString() const {
<< OUTPUT(merge_operator_time_nanos) << OUTPUT(write_delay_time)
<< OUTPUT(read_index_block_nanos) << OUTPUT(read_filter_block_nanos)
<< OUTPUT(new_table_block_iter_nanos) << OUTPUT(new_table_iterator_nanos)
<< OUTPUT(block_seek_nanos) << OUTPUT(find_table_nanos);
<< OUTPUT(block_seek_nanos) << OUTPUT(find_table_nanos)
<< OUTPUT(bloom_memtable_hit_count) << OUTPUT(bloom_memtable_miss_count)
<< OUTPUT(bloom_sst_hit_count) << OUTPUT(bloom_sst_miss_count);
return ss.str();
#endif
}