From 7e4e381495b5063f4156a0a73c9be9baf0053d28 Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Thu, 26 Apr 2018 13:22:08 -0700 Subject: [PATCH] Fix the bloom filter skipping empty prefixes Summary: bc0da4b5125ac4f43c88879522013814355338e7 optimized bloom filters by skipping duplicate entires when the whole key and prefixes are both added to the bloom. It however used empty string as the initial value of the last entry added to the bloom. This is incorrect since empty key/prefix are valid entires by themselves. This patch fixes that. Closes https://github.com/facebook/rocksdb/pull/3776 Differential Revision: D7778803 Pulled By: maysamyabandeh fbshipit-source-id: d5a065daebee17f9403cac51e9d5626aac87bfbc --- table/full_filter_block.cc | 12 ++++++++---- table/full_filter_block.h | 2 ++ table/full_filter_block_test.cc | 17 ++++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/table/full_filter_block.cc b/table/full_filter_block.cc index 4af3125ba..0e621042d 100644 --- a/table/full_filter_block.cc +++ b/table/full_filter_block.cc @@ -17,6 +17,8 @@ FullFilterBlockBuilder::FullFilterBlockBuilder( FilterBitsBuilder* filter_bits_builder) : prefix_extractor_(prefix_extractor), whole_key_filtering_(whole_key_filtering), + last_whole_key_recorded_(false), + last_prefix_recorded_(false), num_added_(0) { assert(filter_bits_builder != nullptr); filter_bits_builder_.reset(filter_bits_builder); @@ -33,9 +35,10 @@ void FullFilterBlockBuilder::Add(const Slice& key) { // bits builder to properly detect the duplicates by comparing with the // last item. Slice last_whole_key = Slice(last_whole_key_str_); - if (last_whole_key.compare(key) != 0) { + if (!last_whole_key_recorded_ || last_whole_key.compare(key) != 0) { AddKey(key); - last_whole_key_str_ = key.ToString(); + last_whole_key_recorded_ = true; + last_whole_key_str_.assign(key.data(), key.size()); } } } @@ -59,9 +62,10 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) { // bits builder to properly detect the duplicates by comparing with the last // item. Slice last_prefix = Slice(last_prefix_str_); - if (last_prefix.compare(prefix) != 0) { + if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) { AddKey(prefix); - last_prefix_str_ = prefix.ToString(); + last_prefix_recorded_ = true; + last_prefix_str_.assign(prefix.data(), prefix.size()); } } else { AddKey(prefix); diff --git a/table/full_filter_block.h b/table/full_filter_block.h index eb2703a4d..352051052 100644 --- a/table/full_filter_block.h +++ b/table/full_filter_block.h @@ -59,7 +59,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder { // should NOT dereference them. const SliceTransform* prefix_extractor_; bool whole_key_filtering_; + bool last_whole_key_recorded_; std::string last_whole_key_str_; + bool last_prefix_recorded_; std::string last_prefix_str_; uint32_t num_added_; diff --git a/table/full_filter_block_test.cc b/table/full_filter_block_test.cc index 73541264a..eb77ec056 100644 --- a/table/full_filter_block_test.cc +++ b/table/full_filter_block_test.cc @@ -162,6 +162,20 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) { } TEST_F(FullFilterBlockTest, DuplicateEntries) { + { // empty prefixes + std::unique_ptr prefix_extractor( + NewFixedPrefixTransform(0)); + auto bits_builder = dynamic_cast( + table_options_.filter_policy->GetFilterBitsBuilder()); + const bool WHOLE_KEY = true; + FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, + bits_builder); + ASSERT_EQ(0, builder.NumAdded()); + builder.Add("key"); // test with empty prefix + ASSERT_EQ(2, bits_builder->hash_entries_.size()); + } + + // mix of empty and non-empty std::unique_ptr prefix_extractor( NewFixedPrefixTransform(7)); auto bits_builder = dynamic_cast( @@ -170,13 +184,14 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) { FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY, bits_builder); ASSERT_EQ(0, builder.NumAdded()); + builder.Add(""); // test with empty key too builder.Add("prefix1key1"); builder.Add("prefix1key1"); builder.Add("prefix1key2"); builder.Add("prefix1key3"); builder.Add("prefix2key4"); // two prefix adn 4 keys - ASSERT_EQ(2 + 4, bits_builder->hash_entries_.size()); + ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size()); } TEST_F(FullFilterBlockTest, SingleChunk) {