Fix the bloom filter skipping empty prefixes

Summary:
bc0da4b512 optimized bloom filters by skipping duplicate entires when the whole key and prefixes are both added to the bloom. It however used empty string as the initial value of the last entry added to the bloom. This is incorrect since empty key/prefix are valid entires by themselves. This patch fixes that.
Closes https://github.com/facebook/rocksdb/pull/3776

Differential Revision: D7778803

Pulled By: maysamyabandeh

fbshipit-source-id: d5a065daebee17f9403cac51e9d5626aac87bfbc
This commit is contained in:
Maysam Yabandeh 2018-04-26 13:22:08 -07:00
parent 5753c17212
commit dcb7f3bdae
3 changed files with 26 additions and 5 deletions

View File

@ -17,6 +17,8 @@ FullFilterBlockBuilder::FullFilterBlockBuilder(
FilterBitsBuilder* filter_bits_builder)
: prefix_extractor_(prefix_extractor),
whole_key_filtering_(whole_key_filtering),
last_whole_key_recorded_(false),
last_prefix_recorded_(false),
num_added_(0) {
assert(filter_bits_builder != nullptr);
filter_bits_builder_.reset(filter_bits_builder);
@ -33,9 +35,10 @@ void FullFilterBlockBuilder::Add(const Slice& key) {
// bits builder to properly detect the duplicates by comparing with the
// last item.
Slice last_whole_key = Slice(last_whole_key_str_);
if (last_whole_key.compare(key) != 0) {
if (!last_whole_key_recorded_ || last_whole_key.compare(key) != 0) {
AddKey(key);
last_whole_key_str_ = key.ToString();
last_whole_key_recorded_ = true;
last_whole_key_str_.assign(key.data(), key.size());
}
}
}
@ -59,9 +62,10 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
// bits builder to properly detect the duplicates by comparing with the last
// item.
Slice last_prefix = Slice(last_prefix_str_);
if (last_prefix.compare(prefix) != 0) {
if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) {
AddKey(prefix);
last_prefix_str_ = prefix.ToString();
last_prefix_recorded_ = true;
last_prefix_str_.assign(prefix.data(), prefix.size());
}
} else {
AddKey(prefix);

View File

@ -59,7 +59,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
// should NOT dereference them.
const SliceTransform* prefix_extractor_;
bool whole_key_filtering_;
bool last_whole_key_recorded_;
std::string last_whole_key_str_;
bool last_prefix_recorded_;
std::string last_prefix_str_;
uint32_t num_added_;

View File

@ -162,6 +162,20 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
}
TEST_F(FullFilterBlockTest, DuplicateEntries) {
{ // empty prefixes
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(0));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
table_options_.filter_policy->GetFilterBitsBuilder());
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
builder.Add("key"); // test with empty prefix
ASSERT_EQ(2, bits_builder->hash_entries_.size());
}
// mix of empty and non-empty
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(7));
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
@ -170,13 +184,14 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
builder.Add(""); // test with empty key too
builder.Add("prefix1key1");
builder.Add("prefix1key1");
builder.Add("prefix1key2");
builder.Add("prefix1key3");
builder.Add("prefix2key4");
// two prefix adn 4 keys
ASSERT_EQ(2 + 4, bits_builder->hash_entries_.size());
ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size());
}
TEST_F(FullFilterBlockTest, SingleChunk) {