Clean up some filter tests and comments (#5960)
Summary: Some filtering tests were unfriendly to new implementations of FilterBitsBuilder because of dynamic_cast to FullFilterBitsBuilder. Most of those have now been cleaned up, worked around, or at least changed from crash on dynamic_cast failure to individual test failure. Also put some clarifying comments on filter-related APIs. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5960 Test Plan: make check Differential Revision: D18121223 Pulled By: pdillinger fbshipit-source-id: e83827d9d5d96315d96f8e25a99cd70f497d802c
This commit is contained in:
parent
2309fd63bf
commit
013babc685
@ -44,12 +44,13 @@ class FilterBitsBuilder {
|
||||
// The ownership of actual data is set to buf
|
||||
virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
|
||||
|
||||
// Calculate num of entries fit into a space.
|
||||
// Calculate num of keys that can be added and generate a filter
|
||||
// <= the specified number of bytes.
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4702) // unreachable code
|
||||
#endif
|
||||
virtual int CalculateNumEntry(const uint32_t /*space*/) {
|
||||
virtual int CalculateNumEntry(const uint32_t /*bytes*/) {
|
||||
#ifndef ROCKSDB_LITE
|
||||
throw std::runtime_error("CalculateNumEntry not Implemented");
|
||||
#else
|
||||
@ -119,13 +120,13 @@ class FilterPolicy {
|
||||
// list, but it should aim to return false with a high probability.
|
||||
virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
|
||||
|
||||
// Get the FilterBitsBuilder, which is ONLY used for full filter block
|
||||
// It contains interface to take individual key, then generate filter
|
||||
// Return a new FilterBitsBuilder for full or partitioned filter blocks, or
|
||||
// nullptr if using block-based filter.
|
||||
virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; }
|
||||
|
||||
// Get the FilterBitsReader, which is ONLY used for full filter block
|
||||
// It contains interface to tell if key can be in filter
|
||||
// The input slice should NOT be deleted by FilterPolicy
|
||||
// Return a new FilterBitsReader for full or partitioned filter blocks, or
|
||||
// nullptr if using block-based filter.
|
||||
// As here, the input slice should NOT be deleted by FilterPolicy.
|
||||
virtual FilterBitsReader* GetFilterBitsReader(
|
||||
const Slice& /*contents*/) const {
|
||||
return nullptr;
|
||||
|
@ -103,16 +103,16 @@ char* FullFilterBitsBuilder::ReserveSpace(const int num_entry,
|
||||
return data;
|
||||
}
|
||||
|
||||
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t space) {
|
||||
int FullFilterBitsBuilder::CalculateNumEntry(const uint32_t bytes) {
|
||||
assert(bits_per_key_);
|
||||
assert(space > 0);
|
||||
assert(bytes > 0);
|
||||
uint32_t dont_care1, dont_care2;
|
||||
int high = static_cast<int>(space * 8 / bits_per_key_ + 1);
|
||||
int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1);
|
||||
int low = 1;
|
||||
int n = high;
|
||||
for (; n >= low; n--) {
|
||||
uint32_t sz = CalculateSpace(n, &dont_care1, &dont_care2);
|
||||
if (sz <= space) {
|
||||
if (sz <= bytes) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
|
||||
|
||||
~FullFilterBitsBuilder();
|
||||
|
||||
virtual void AddKey(const Slice& key) override;
|
||||
void AddKey(const Slice& key) override;
|
||||
|
||||
// Create a filter that for hashes [0, n-1], the filter is allocated here
|
||||
// When creating filter, it is ensured that
|
||||
@ -44,12 +44,13 @@ class FullFilterBitsBuilder : public FilterBitsBuilder {
|
||||
// +----------------------------------------------------------------+
|
||||
// | ... | num_probes : 1 byte | num_lines : 4 bytes |
|
||||
// +----------------------------------------------------------------+
|
||||
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override;
|
||||
Slice Finish(std::unique_ptr<const char[]>* buf) override;
|
||||
|
||||
// Calculate num of entries fit into a space.
|
||||
virtual int CalculateNumEntry(const uint32_t space) override;
|
||||
int CalculateNumEntry(const uint32_t bytes) override;
|
||||
|
||||
// Calculate space for new filter. This is reverse of CalculateNumEntry.
|
||||
// Calculate number of bytes needed for a new filter, including
|
||||
// metadata. Passing the result to CalculateNumEntry should
|
||||
// return >= the num_entry passed in.
|
||||
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
|
||||
uint32_t* num_lines);
|
||||
|
||||
|
@ -3,6 +3,8 @@
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#include <set>
|
||||
|
||||
#include "table/block_based/full_filter_block.h"
|
||||
#include "rocksdb/filter_policy.h"
|
||||
#include "table/block_based/block_based_table_reader.h"
|
||||
@ -205,24 +207,62 @@ TEST_F(FullFilterBlockTest, EmptyBuilder) {
|
||||
/*lookup_context=*/nullptr));
|
||||
}
|
||||
|
||||
class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder {
|
||||
std::unique_ptr<FilterBitsBuilder> b_;
|
||||
std::set<std::string> uniq_;
|
||||
|
||||
public:
|
||||
explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {}
|
||||
|
||||
~CountUniqueFilterBitsBuilderWrapper() override {}
|
||||
|
||||
void AddKey(const Slice& key) override {
|
||||
b_->AddKey(key);
|
||||
uniq_.insert(key.ToString());
|
||||
}
|
||||
|
||||
Slice Finish(std::unique_ptr<const char[]>* buf) override {
|
||||
Slice rv = b_->Finish(buf);
|
||||
uniq_.clear();
|
||||
return rv;
|
||||
}
|
||||
|
||||
int CalculateNumEntry(const uint32_t bytes) override {
|
||||
return b_->CalculateNumEntry(bytes);
|
||||
}
|
||||
|
||||
size_t CountUnique() { return uniq_.size(); }
|
||||
};
|
||||
|
||||
TEST_F(FullFilterBlockTest, DuplicateEntries) {
|
||||
{ // empty prefixes
|
||||
std::unique_ptr<const SliceTransform> prefix_extractor(
|
||||
NewFixedPrefixTransform(0));
|
||||
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
|
||||
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(
|
||||
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||
const bool WHOLE_KEY = true;
|
||||
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
||||
bits_builder);
|
||||
ASSERT_EQ(0, builder.NumAdded());
|
||||
builder.Add("key"); // test with empty prefix
|
||||
ASSERT_EQ(2, bits_builder->hash_entries_.size());
|
||||
ASSERT_EQ(0, bits_builder->CountUnique());
|
||||
// adds key and empty prefix; both abstractions count them
|
||||
builder.Add("key1");
|
||||
ASSERT_EQ(2, builder.NumAdded());
|
||||
ASSERT_EQ(2, bits_builder->CountUnique());
|
||||
// Add different key (unique) and also empty prefix (not unique).
|
||||
// From here in this test, it's immaterial whether the block builder
|
||||
// can count unique keys.
|
||||
builder.Add("key2");
|
||||
ASSERT_EQ(3, bits_builder->CountUnique());
|
||||
// Empty key -> nothing unique
|
||||
builder.Add("");
|
||||
ASSERT_EQ(3, bits_builder->CountUnique());
|
||||
}
|
||||
|
||||
// mix of empty and non-empty
|
||||
std::unique_ptr<const SliceTransform> prefix_extractor(
|
||||
NewFixedPrefixTransform(7));
|
||||
auto bits_builder = dynamic_cast<FullFilterBitsBuilder*>(
|
||||
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(
|
||||
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||
const bool WHOLE_KEY = true;
|
||||
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
|
||||
@ -234,8 +274,8 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
|
||||
builder.Add("prefix1key2");
|
||||
builder.Add("prefix1key3");
|
||||
builder.Add("prefix2key4");
|
||||
// two prefix adn 4 keys
|
||||
ASSERT_EQ(1 + 2 + 4, bits_builder->hash_entries_.size());
|
||||
// 1 empty, 2 non-empty prefixes, and 4 non-empty keys
|
||||
ASSERT_EQ(1 + 2 + 4, bits_builder->CountUnique());
|
||||
}
|
||||
|
||||
TEST_F(FullFilterBlockTest, SingleChunk) {
|
||||
|
@ -65,12 +65,15 @@ class PartitionedFilterBlockTest
|
||||
InternalKeyComparator icomp_;
|
||||
std::unique_ptr<BlockBasedTable> table_;
|
||||
std::shared_ptr<Cache> cache_;
|
||||
int bits_per_key_;
|
||||
|
||||
PartitionedFilterBlockTest()
|
||||
: ioptions_(options_),
|
||||
env_options_(options_),
|
||||
icomp_(options_.comparator) {
|
||||
table_options_.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
||||
icomp_(options_.comparator),
|
||||
bits_per_key_(10) {
|
||||
table_options_.filter_policy.reset(
|
||||
NewBloomFilterPolicy(bits_per_key_, false));
|
||||
table_options_.format_version = GetParam();
|
||||
table_options_.index_block_restart_interval = 3;
|
||||
}
|
||||
@ -91,16 +94,9 @@ class PartitionedFilterBlockTest
|
||||
}
|
||||
|
||||
uint64_t MaxFilterSize() {
|
||||
uint32_t dont_care1, dont_care2;
|
||||
int num_keys = sizeof(keys) / sizeof(*keys);
|
||||
auto filter_bits_reader = dynamic_cast<rocksdb::FullFilterBitsBuilder*>(
|
||||
table_options_.filter_policy->GetFilterBitsBuilder());
|
||||
assert(filter_bits_reader);
|
||||
auto partition_size =
|
||||
filter_bits_reader->CalculateSpace(num_keys, &dont_care1, &dont_care2);
|
||||
delete filter_bits_reader;
|
||||
return partition_size +
|
||||
partition_size * table_options_.block_size_deviation / 100;
|
||||
// General, rough over-approximation
|
||||
return num_keys * bits_per_key_ + (CACHE_LINE_SIZE * 8 + /*metadata*/ 5);
|
||||
}
|
||||
|
||||
uint64_t last_offset = 10;
|
||||
|
@ -324,6 +324,7 @@ class FullBloomTest : public testing::Test {
|
||||
TEST_F(FullBloomTest, FilterSize) {
|
||||
uint32_t dont_care1, dont_care2;
|
||||
auto full_bits_builder = GetFullFilterBitsBuilder();
|
||||
ASSERT_TRUE(full_bits_builder != nullptr);
|
||||
for (int n = 1; n < 100; n++) {
|
||||
auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2);
|
||||
auto n2 = full_bits_builder->CalculateNumEntry(space);
|
||||
|
Loading…
Reference in New Issue
Block a user