Fix the bloom filter skipping empty prefixes
Summary:
bc0da4b512
optimized bloom filters by skipping duplicate entires when the whole key and prefixes are both added to the bloom. It however used empty string as the initial value of the last entry added to the bloom. This is incorrect since empty key/prefix are valid entires by themselves. This patch fixes that.
Closes https://github.com/facebook/rocksdb/pull/3776
Differential Revision: D7778803
Pulled By: maysamyabandeh
fbshipit-source-id: d5a065daebee17f9403cac51e9d5626aac87bfbc
This commit is contained in:
parent
5753c17212
commit
dcb7f3bdae
@ -17,6 +17,8 @@ FullFilterBlockBuilder::FullFilterBlockBuilder(
|
||||
FilterBitsBuilder* filter_bits_builder)
|
||||
: prefix_extractor_(prefix_extractor),
|
||||
whole_key_filtering_(whole_key_filtering),
|
||||
last_whole_key_recorded_(false),
|
||||
last_prefix_recorded_(false),
|
||||
num_added_(0) {
|
||||
assert(filter_bits_builder != nullptr);
|
||||
filter_bits_builder_.reset(filter_bits_builder);
|
||||
@ -33,9 +35,10 @@ void FullFilterBlockBuilder::Add(const Slice& key) {
|
||||
// bits builder to properly detect the duplicates by comparing with the
|
||||
// last item.
|
||||
Slice last_whole_key = Slice(last_whole_key_str_);
|
||||
if (last_whole_key.compare(key) != 0) {
|
||||
if (!last_whole_key_recorded_ || last_whole_key.compare(key) != 0) {
|
||||
AddKey(key);
|
||||
last_whole_key_str_ = key.ToString();
|
||||
last_whole_key_recorded_ = true;
|
||||
last_whole_key_str_.assign(key.data(), key.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -59,9 +62,10 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
|
||||
// bits builder to properly detect the duplicates by comparing with the last
|
||||
// item.
|
||||
Slice last_prefix = Slice(last_prefix_str_);
|
||||
if (last_prefix.compare(prefix) != 0) {
|
||||
if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) {
|
||||
AddKey(prefix);
|
||||
last_prefix_str_ = prefix.ToString();
|
||||
last_prefix_recorded_ = true;
|
||||
last_prefix_str_.assign(prefix.data(), prefix.size());
|
||||
}
|
||||
} else {
|
||||
AddKey(prefix);
|
||||
|
@ -59,7 +59,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
|
||||
// should NOT dereference them.
|
||||
const SliceTransform* prefix_extractor_;
|
||||
bool whole_key_filtering_;
|
||||
bool last_whole_key_recorded_;
|
||||
std::string last_whole_key_str_;
|
||||
bool last_prefix_recorded_;
|
||||
std::string last_prefix_str_;
|
||||
|
||||
uint32_t num_added_;
|
||||
|
@ -162,6 +162,20 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
|
||||
}
|
||||
|
||||
TEST_F(FullFilterBlockTest, DuplicateEntries) {
|
||||
{ // empty prefixes
|
||||
std::unique_ptr<const SliceTransform> prefix_extractor(
|
||||
NewFixedPrefixTransform(0));
|
||||
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
|
||||
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||
const bool WHOLE_KEY = true;
|
||||
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
||||
bits_builder);
|
||||
ASSERT_EQ(0, builder.NumAdded());
|
||||
builder.Add("key"); // test with empty prefix
|
||||
ASSERT_EQ(2, bits_builder->hash_entries_.size());
|
||||
}
|
||||
|
||||
// mix of empty and non-empty
|
||||
std::unique_ptr<const SliceTransform> prefix_extractor(
|
||||
NewFixedPrefixTransform(7));
|
||||
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
|
||||
@ -170,13 +184,14 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
|
||||
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
||||
bits_builder);
|
||||
ASSERT_EQ(0, builder.NumAdded());
|
||||
builder.Add(""); // test with empty key too
|
||||
builder.Add("prefix1key1");
|
||||
builder.Add("prefix1key1");
|
||||
builder.Add("prefix1key2");
|
||||
builder.Add("prefix1key3");
|
||||
builder.Add("prefix2key4");
|
||||
// two prefix adn 4 keys
|
||||
ASSERT_EQ(2 + 4, bits_builder->hash_entries_.size());
|
||||
ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size());
|
||||
}
|
||||
|
||||
TEST_F(FullFilterBlockTest, SingleChunk) {
|
||||
|
Loading…
Reference in New Issue
Block a user