Add regression test for serialized Bloom filters (#5778)
Summary: Check that we don't accidentally change the on-disk format of existing Bloom filter implementations, including for various CACHE_LINE_SIZE (by changing temporarily). Pull Request resolved: https://github.com/facebook/rocksdb/pull/5778 Test Plan: thisisthetest Differential Revision: D17269630 Pulled By: pdillinger fbshipit-source-id: c77017662f010a77603b7d475892b1f0d5563d8b
This commit is contained in:
parent
fbab9913e2
commit
108c619acb
@ -770,8 +770,19 @@ TEST_P(PlainTableDBTest, BloomSchema) {
|
||||
for (unsigned i = 0; i < 32; ++i) {
|
||||
// Known pattern of Bloom filter false positives can detect schema change
|
||||
// with high probability. Known FPs stuffed into bits:
|
||||
bool expect_fp = (bloom_locality ? 2421694657UL : 1785868347UL)
|
||||
& (1UL << i);
|
||||
uint32_t pattern;
|
||||
if (!bloom_locality) {
|
||||
pattern = 1785868347UL;
|
||||
} else if (CACHE_LINE_SIZE == 64) {
|
||||
pattern = 2421694657UL;
|
||||
} else if (CACHE_LINE_SIZE == 128) {
|
||||
pattern = 788710956UL;
|
||||
} else {
|
||||
ASSERT_EQ(CACHE_LINE_SIZE, 256);
|
||||
pattern = 163905UL;
|
||||
}
|
||||
bool expect_fp = pattern & (1UL << i);
|
||||
//fprintf(stderr, "expect_fp@%u: %d\n", i, (int)expect_fp);
|
||||
expect_bloom_not_match = !expect_fp;
|
||||
ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n')));
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ int main() {
|
||||
#include "table/full_filter_bits_builder.h"
|
||||
#include "test_util/testharness.h"
|
||||
#include "test_util/testutil.h"
|
||||
#include "util/hash.h"
|
||||
#include "util/gflags_compat.h"
|
||||
|
||||
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
|
||||
@ -55,7 +56,7 @@ static int NextLength(int length) {
|
||||
|
||||
class BloomTest : public testing::Test {
|
||||
private:
|
||||
const FilterPolicy* policy_;
|
||||
std::unique_ptr<const FilterPolicy> policy_;
|
||||
std::string filter_;
|
||||
std::vector<std::string> keys_;
|
||||
|
||||
@ -63,13 +64,20 @@ class BloomTest : public testing::Test {
|
||||
BloomTest() : policy_(
|
||||
NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
|
||||
|
||||
~BloomTest() override { delete policy_; }
|
||||
|
||||
void Reset() {
|
||||
keys_.clear();
|
||||
filter_.clear();
|
||||
}
|
||||
|
||||
void ResetPolicy(const FilterPolicy* policy = nullptr) {
|
||||
if (policy == nullptr) {
|
||||
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key));
|
||||
} else {
|
||||
policy_.reset(policy);
|
||||
}
|
||||
Reset();
|
||||
}
|
||||
|
||||
void Add(const Slice& s) {
|
||||
keys_.push_back(s.ToString());
|
||||
}
|
||||
@ -90,6 +98,10 @@ class BloomTest : public testing::Test {
|
||||
return filter_.size();
|
||||
}
|
||||
|
||||
Slice FilterData() const {
|
||||
return Slice(filter_);
|
||||
}
|
||||
|
||||
void DumpFilter() {
|
||||
fprintf(stderr, "F(");
|
||||
for (size_t i = 0; i+1 < filter_.size(); i++) {
|
||||
@ -173,11 +185,62 @@ TEST_F(BloomTest, VaryingLengths) {
|
||||
ASSERT_LE(mediocre_filters, good_filters/5);
|
||||
}
|
||||
|
||||
// Ensure the implementation doesn't accidentally change in an
|
||||
// incompatible way
|
||||
TEST_F(BloomTest, Schema) {
|
||||
char buffer[sizeof(int)];
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5
|
||||
for (int key = 0; key < 87; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6
|
||||
for (int key = 0; key < 87; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()), 969445585);
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7
|
||||
for (int key = 0; key < 87; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()), 1694458207);
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6
|
||||
for (int key = 0; key < 87; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(10));
|
||||
for (int key = 1; key < 87; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()), 1908442116);
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(10));
|
||||
for (int key = 1; key < 88; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
|
||||
|
||||
ResetPolicy();
|
||||
}
|
||||
|
||||
|
||||
// Different bits-per-byte
|
||||
|
||||
class FullBloomTest : public testing::Test {
|
||||
private:
|
||||
const FilterPolicy* policy_;
|
||||
std::unique_ptr<const FilterPolicy> policy_;
|
||||
std::unique_ptr<FilterBitsBuilder> bits_builder_;
|
||||
std::unique_ptr<FilterBitsReader> bits_reader_;
|
||||
std::unique_ptr<const char[]> buf_;
|
||||
@ -190,8 +253,6 @@ class FullBloomTest : public testing::Test {
|
||||
Reset();
|
||||
}
|
||||
|
||||
~FullBloomTest() override { delete policy_; }
|
||||
|
||||
FullFilterBitsBuilder* GetFullFilterBitsBuilder() {
|
||||
return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get());
|
||||
}
|
||||
@ -203,6 +264,15 @@ class FullBloomTest : public testing::Test {
|
||||
filter_size_ = 0;
|
||||
}
|
||||
|
||||
void ResetPolicy(const FilterPolicy* policy = nullptr) {
|
||||
if (policy == nullptr) {
|
||||
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key, false));
|
||||
} else {
|
||||
policy_.reset(policy);
|
||||
}
|
||||
Reset();
|
||||
}
|
||||
|
||||
void Add(const Slice& s) {
|
||||
bits_builder_->AddKey(s);
|
||||
}
|
||||
@ -217,6 +287,10 @@ class FullBloomTest : public testing::Test {
|
||||
return filter_size_;
|
||||
}
|
||||
|
||||
Slice FilterData() {
|
||||
return Slice(buf_.get(), filter_size_);
|
||||
}
|
||||
|
||||
bool Matches(const Slice& s) {
|
||||
if (bits_reader_ == nullptr) {
|
||||
Build();
|
||||
@ -305,6 +379,84 @@ TEST_F(FullBloomTest, FullVaryingLengths) {
|
||||
ASSERT_LE(mediocre_filters, good_filters/5);
|
||||
}
|
||||
|
||||
namespace {
|
||||
inline uint32_t SelectByCacheLineSize(uint32_t for64,
|
||||
uint32_t for128,
|
||||
uint32_t for256) {
|
||||
(void)for64;
|
||||
(void)for128;
|
||||
(void)for256;
|
||||
#if CACHE_LINE_SIZE == 64
|
||||
return for64;
|
||||
#elif CACHE_LINE_SIZE == 128
|
||||
return for128;
|
||||
#elif CACHE_LINE_SIZE == 256
|
||||
return for256;
|
||||
#else
|
||||
#error "CACHE_LINE_SIZE unknown or unrecognized"
|
||||
#endif
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Ensure the implementation doesn't accidentally change in an
|
||||
// incompatible way
|
||||
TEST_F(FullBloomTest, Schema) {
|
||||
char buffer[sizeof(int)];
|
||||
|
||||
// Use enough keys so that changing bits / key by 1 is guaranteed to
|
||||
// change number of allocated cache lines. So keys > max cache line bits.
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5
|
||||
for (int key = 0; key < 2087; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()),
|
||||
SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6
|
||||
for (int key = 0; key < 2087; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()),
|
||||
SelectByCacheLineSize(2092755149, 661139132, 1182970461));
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7
|
||||
for (int key = 0; key < 2087; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()),
|
||||
SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6
|
||||
for (int key = 0; key < 2087; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()),
|
||||
SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(10));
|
||||
for (int key = 1; key < 2087; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()),
|
||||
SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
|
||||
|
||||
ResetPolicy(NewBloomFilterPolicy(10));
|
||||
for (int key = 1; key < 2088; key++) {
|
||||
Add(Key(key, buffer));
|
||||
}
|
||||
Build();
|
||||
ASSERT_EQ(BloomHash(FilterData()),
|
||||
SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
|
||||
|
||||
ResetPolicy();
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user