rocksdb/table/block_based/full_filter_block_test.cc
Peter Dillinger 003e72b201 Use size_t for filter APIs, protect against overflow (#7726)
Summary:
Deprecate CalculateNumEntry and replace with
ApproximateNumEntries (better name) using size_t instead of int and
uint32_t, to minimize confusing casts and bad overflow behavior
(possible though probably not realistic). Bloom sizes are now explicitly
capped at max size supported by implementations: just under 4GiB for
fv=5 Bloom, and just under 512MiB for fv<5 Legacy Bloom. This
hardening could help to set up for fuzzing.

Also, since RocksDB only uses this information as an approximation
for trying to hit certain sizes for partitioned filters, it's more important
that the function be reasonably fast than for it to be completely
accurate. It's hard enough to be 100% accurate for Ribbon (currently
reversing CalculateSpace) that adding optimize_filters_for_memory
into the mix is just not worth trying to be 100% accurate for num
entries for bytes.

Also:
- Cleaned up filter_policy.h to remove MSVC warning handling and
potentially unsafe use of exception for "not implemented"
- Correct the number of entries limit beyond which current Ribbon
implementation falls back on Bloom instead.
- Consistently use "num_entries" rather than "num_entry"
- Remove LegacyBloomBitsBuilder::CalculateNumEntry as it's essentially
obsolete from general implementation
BuiltinFilterBitsBuilder::CalculateNumEntries.
- Fix filter_bench to skip some tests that don't make sense when only
one or a small number of filters has been generated.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7726

Test Plan:
expanded existing unit tests for CalculateSpace /
ApproximateNumEntries. Also manually used filter_bench to verify Legacy and
fv=5 Bloom size caps work (much too expensive for unit test). Note that
the actual bits per key is below requested due to space cap.

    $ ./filter_bench -impl=0 -bits_per_key=20 -average_keys_per_filter=256000000 -vary_key_count_ratio=0 -m_keys_total_max=256 -allow_bad_fp_rate
    ...
    Total size (MB): 511.992
    Bits/key stored: 16.777
    ...
    $ ./filter_bench -impl=2 -bits_per_key=20 -average_keys_per_filter=2000000000 -vary_key_count_ratio=0 -m_keys_total_max=2000
    ...
    Total size (MB): 4096
    Bits/key stored: 17.1799
    ...
    $

Reviewed By: jay-zhuang

Differential Revision: D25239800

Pulled By: pdillinger

fbshipit-source-id: f94e6d065efd31e05ec630ae1a82e6400d8390c4
2020-12-11 22:18:12 -08:00

334 lines
13 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <set>
#include "table/block_based/full_filter_block.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/mock_block_based_table.h"
#include "table/block_based/filter_policy_internal.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/coding.h"
#include "util/hash.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
class TestFilterBitsBuilder : public FilterBitsBuilder {
public:
explicit TestFilterBitsBuilder() {}
// Add Key to filter
void AddKey(const Slice& key) override {
hash_entries_.push_back(Hash(key.data(), key.size(), 1));
}
// Generate the filter using the keys that are added
Slice Finish(std::unique_ptr<const char[]>* buf) override {
uint32_t len = static_cast<uint32_t>(hash_entries_.size()) * 4;
char* data = new char[len];
for (size_t i = 0; i < hash_entries_.size(); i++) {
EncodeFixed32(data + i * 4, hash_entries_[i]);
}
const char* const_data = data;
buf->reset(const_data);
return Slice(data, len);
}
private:
std::vector<uint32_t> hash_entries_;
};
class MockBlockBasedTable : public BlockBasedTable {
public:
explicit MockBlockBasedTable(Rep* rep)
: BlockBasedTable(rep, nullptr /* block_cache_tracer */) {}
};
class TestFilterBitsReader : public FilterBitsReader {
public:
explicit TestFilterBitsReader(const Slice& contents)
: data_(contents.data()), len_(static_cast<uint32_t>(contents.size())) {}
// Silence compiler warning about overloaded virtual
using FilterBitsReader::MayMatch;
bool MayMatch(const Slice& entry) override {
uint32_t h = Hash(entry.data(), entry.size(), 1);
for (size_t i = 0; i + 4 <= len_; i += 4) {
if (h == DecodeFixed32(data_ + i)) {
return true;
}
}
return false;
}
private:
const char* data_;
uint32_t len_;
};
class TestHashFilter : public FilterPolicy {
public:
const char* Name() const override { return "TestHashFilter"; }
void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
for (int i = 0; i < n; i++) {
uint32_t h = Hash(keys[i].data(), keys[i].size(), 1);
PutFixed32(dst, h);
}
}
bool KeyMayMatch(const Slice& key, const Slice& filter) const override {
uint32_t h = Hash(key.data(), key.size(), 1);
for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) {
if (h == DecodeFixed32(filter.data() + i)) {
return true;
}
}
return false;
}
FilterBitsBuilder* GetFilterBitsBuilder() const override {
return new TestFilterBitsBuilder();
}
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override {
return new TestFilterBitsReader(contents);
}
};
class PluginFullFilterBlockTest : public mock::MockBlockBasedTableTester,
public testing::Test {
public:
PluginFullFilterBlockTest()
: mock::MockBlockBasedTableTester(new TestHashFilter) {}
};
TEST_F(PluginFullFilterBlockTest, PluginEmptyBuilder) {
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
Slice slice = builder.Finish();
ASSERT_EQ("", EscapeString(slice));
CachableEntry<ParsedFullFilterBlock> block(
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
BlockContents(slice)),
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
FullFilterBlockReader reader(table_.get(), std::move(block));
// Remain same symantic with blockbased filter
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
TEST_F(PluginFullFilterBlockTest, PluginSingleChunk) {
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
builder.Add("foo");
builder.Add("bar");
builder.Add("box");
builder.Add("box");
builder.Add("hello");
Slice slice = builder.Finish();
CachableEntry<ParsedFullFilterBlock> block(
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
BlockContents(slice)),
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
FullFilterBlockReader reader(table_.get(), std::move(block));
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
class FullFilterBlockTest : public mock::MockBlockBasedTableTester,
public testing::Test {
public:
FullFilterBlockTest()
: mock::MockBlockBasedTableTester(NewBloomFilterPolicy(10, false)) {}
};
TEST_F(FullFilterBlockTest, EmptyBuilder) {
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
Slice slice = builder.Finish();
ASSERT_EQ("", EscapeString(slice));
CachableEntry<ParsedFullFilterBlock> block(
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
BlockContents(slice)),
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
FullFilterBlockReader reader(table_.get(), std::move(block));
// Remain same symantic with blockbased filter
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder {
std::unique_ptr<FilterBitsBuilder> b_;
std::set<std::string> uniq_;
public:
explicit CountUniqueFilterBitsBuilderWrapper(FilterBitsBuilder* b) : b_(b) {}
~CountUniqueFilterBitsBuilderWrapper() override {}
void AddKey(const Slice& key) override {
b_->AddKey(key);
uniq_.insert(key.ToString());
}
Slice Finish(std::unique_ptr<const char[]>* buf) override {
Slice rv = b_->Finish(buf);
uniq_.clear();
return rv;
}
size_t ApproximateNumEntries(size_t bytes) override {
return b_->ApproximateNumEntries(bytes);
}
size_t CountUnique() { return uniq_.size(); }
};
TEST_F(FullFilterBlockTest, DuplicateEntries) {
{ // empty prefixes
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(0));
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(GetBuilder());
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
ASSERT_EQ(0, bits_builder->CountUnique());
// adds key and empty prefix; both abstractions count them
builder.Add("key1");
ASSERT_EQ(2, builder.NumAdded());
ASSERT_EQ(2, bits_builder->CountUnique());
// Add different key (unique) and also empty prefix (not unique).
// From here in this test, it's immaterial whether the block builder
// can count unique keys.
builder.Add("key2");
ASSERT_EQ(3, bits_builder->CountUnique());
// Empty key -> nothing unique
builder.Add("");
ASSERT_EQ(3, bits_builder->CountUnique());
}
// mix of empty and non-empty
std::unique_ptr<const SliceTransform> prefix_extractor(
NewFixedPrefixTransform(7));
auto bits_builder = new CountUniqueFilterBitsBuilderWrapper(GetBuilder());
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
builder.Add(""); // test with empty key too
builder.Add("prefix1key1");
builder.Add("prefix1key1");
builder.Add("prefix1key2");
builder.Add("prefix1key3");
builder.Add("prefix2key4");
// 1 empty, 2 non-empty prefixes, and 4 non-empty keys
ASSERT_EQ(1 + 2 + 4, bits_builder->CountUnique());
}
TEST_F(FullFilterBlockTest, SingleChunk) {
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
ASSERT_EQ(0, builder.NumAdded());
builder.Add("foo");
builder.Add("bar");
builder.Add("box");
builder.Add("box");
builder.Add("hello");
ASSERT_EQ(5, builder.NumAdded());
Slice slice = builder.Finish();
CachableEntry<ParsedFullFilterBlock> block(
new ParsedFullFilterBlock(table_options_.filter_policy.get(),
BlockContents(slice)),
nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
FullFilterBlockReader reader(table_.get(), std::move(block));
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr,
/*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr,
/*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"missing", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
ASSERT_TRUE(!reader.KeyMayMatch(
"other", /*prefix_extractor=*/nullptr, /*block_offset=*/kNotValid,
/*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr,
/*lookup_context=*/nullptr));
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}