Add regression test for serialized Bloom filters (#5778)
Summary: Check that we don't accidentally change the on-disk format of existing Bloom filter implementations, including for various CACHE_LINE_SIZE (by changing temporarily). Pull Request resolved: https://github.com/facebook/rocksdb/pull/5778 Test Plan: thisisthetest Differential Revision: D17269630 Pulled By: pdillinger fbshipit-source-id: c77017662f010a77603b7d475892b1f0d5563d8b
This commit is contained in:
parent
fbab9913e2
commit
108c619acb
@ -770,8 +770,19 @@ TEST_P(PlainTableDBTest, BloomSchema) {
|
|||||||
for (unsigned i = 0; i < 32; ++i) {
|
for (unsigned i = 0; i < 32; ++i) {
|
||||||
// Known pattern of Bloom filter false positives can detect schema change
|
// Known pattern of Bloom filter false positives can detect schema change
|
||||||
// with high probability. Known FPs stuffed into bits:
|
// with high probability. Known FPs stuffed into bits:
|
||||||
bool expect_fp = (bloom_locality ? 2421694657UL : 1785868347UL)
|
uint32_t pattern;
|
||||||
& (1UL << i);
|
if (!bloom_locality) {
|
||||||
|
pattern = 1785868347UL;
|
||||||
|
} else if (CACHE_LINE_SIZE == 64) {
|
||||||
|
pattern = 2421694657UL;
|
||||||
|
} else if (CACHE_LINE_SIZE == 128) {
|
||||||
|
pattern = 788710956UL;
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(CACHE_LINE_SIZE, 256);
|
||||||
|
pattern = 163905UL;
|
||||||
|
}
|
||||||
|
bool expect_fp = pattern & (1UL << i);
|
||||||
|
//fprintf(stderr, "expect_fp@%u: %d\n", i, (int)expect_fp);
|
||||||
expect_bloom_not_match = !expect_fp;
|
expect_bloom_not_match = !expect_fp;
|
||||||
ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n')));
|
ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n')));
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@ int main() {
|
|||||||
#include "table/full_filter_bits_builder.h"
|
#include "table/full_filter_bits_builder.h"
|
||||||
#include "test_util/testharness.h"
|
#include "test_util/testharness.h"
|
||||||
#include "test_util/testutil.h"
|
#include "test_util/testutil.h"
|
||||||
|
#include "util/hash.h"
|
||||||
#include "util/gflags_compat.h"
|
#include "util/gflags_compat.h"
|
||||||
|
|
||||||
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
|
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
|
||||||
@ -55,7 +56,7 @@ static int NextLength(int length) {
|
|||||||
|
|
||||||
class BloomTest : public testing::Test {
|
class BloomTest : public testing::Test {
|
||||||
private:
|
private:
|
||||||
const FilterPolicy* policy_;
|
std::unique_ptr<const FilterPolicy> policy_;
|
||||||
std::string filter_;
|
std::string filter_;
|
||||||
std::vector<std::string> keys_;
|
std::vector<std::string> keys_;
|
||||||
|
|
||||||
@ -63,13 +64,20 @@ class BloomTest : public testing::Test {
|
|||||||
BloomTest() : policy_(
|
BloomTest() : policy_(
|
||||||
NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
|
NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
|
||||||
|
|
||||||
~BloomTest() override { delete policy_; }
|
|
||||||
|
|
||||||
void Reset() {
|
void Reset() {
|
||||||
keys_.clear();
|
keys_.clear();
|
||||||
filter_.clear();
|
filter_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ResetPolicy(const FilterPolicy* policy = nullptr) {
|
||||||
|
if (policy == nullptr) {
|
||||||
|
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key));
|
||||||
|
} else {
|
||||||
|
policy_.reset(policy);
|
||||||
|
}
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
|
||||||
void Add(const Slice& s) {
|
void Add(const Slice& s) {
|
||||||
keys_.push_back(s.ToString());
|
keys_.push_back(s.ToString());
|
||||||
}
|
}
|
||||||
@ -90,6 +98,10 @@ class BloomTest : public testing::Test {
|
|||||||
return filter_.size();
|
return filter_.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Slice FilterData() const {
|
||||||
|
return Slice(filter_);
|
||||||
|
}
|
||||||
|
|
||||||
void DumpFilter() {
|
void DumpFilter() {
|
||||||
fprintf(stderr, "F(");
|
fprintf(stderr, "F(");
|
||||||
for (size_t i = 0; i+1 < filter_.size(); i++) {
|
for (size_t i = 0; i+1 < filter_.size(); i++) {
|
||||||
@ -173,11 +185,62 @@ TEST_F(BloomTest, VaryingLengths) {
|
|||||||
ASSERT_LE(mediocre_filters, good_filters/5);
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure the implementation doesn't accidentally change in an
|
||||||
|
// incompatible way
|
||||||
|
TEST_F(BloomTest, Schema) {
|
||||||
|
char buffer[sizeof(int)];
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5
|
||||||
|
for (int key = 0; key < 87; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6
|
||||||
|
for (int key = 0; key < 87; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()), 969445585);
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7
|
||||||
|
for (int key = 0; key < 87; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()), 1694458207);
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6
|
||||||
|
for (int key = 0; key < 87; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(10));
|
||||||
|
for (int key = 1; key < 87; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()), 1908442116);
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(10));
|
||||||
|
for (int key = 1; key < 88; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
|
||||||
|
|
||||||
|
ResetPolicy();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Different bits-per-byte
|
// Different bits-per-byte
|
||||||
|
|
||||||
class FullBloomTest : public testing::Test {
|
class FullBloomTest : public testing::Test {
|
||||||
private:
|
private:
|
||||||
const FilterPolicy* policy_;
|
std::unique_ptr<const FilterPolicy> policy_;
|
||||||
std::unique_ptr<FilterBitsBuilder> bits_builder_;
|
std::unique_ptr<FilterBitsBuilder> bits_builder_;
|
||||||
std::unique_ptr<FilterBitsReader> bits_reader_;
|
std::unique_ptr<FilterBitsReader> bits_reader_;
|
||||||
std::unique_ptr<const char[]> buf_;
|
std::unique_ptr<const char[]> buf_;
|
||||||
@ -190,8 +253,6 @@ class FullBloomTest : public testing::Test {
|
|||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
~FullBloomTest() override { delete policy_; }
|
|
||||||
|
|
||||||
FullFilterBitsBuilder* GetFullFilterBitsBuilder() {
|
FullFilterBitsBuilder* GetFullFilterBitsBuilder() {
|
||||||
return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get());
|
return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get());
|
||||||
}
|
}
|
||||||
@ -203,6 +264,15 @@ class FullBloomTest : public testing::Test {
|
|||||||
filter_size_ = 0;
|
filter_size_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ResetPolicy(const FilterPolicy* policy = nullptr) {
|
||||||
|
if (policy == nullptr) {
|
||||||
|
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key, false));
|
||||||
|
} else {
|
||||||
|
policy_.reset(policy);
|
||||||
|
}
|
||||||
|
Reset();
|
||||||
|
}
|
||||||
|
|
||||||
void Add(const Slice& s) {
|
void Add(const Slice& s) {
|
||||||
bits_builder_->AddKey(s);
|
bits_builder_->AddKey(s);
|
||||||
}
|
}
|
||||||
@ -217,6 +287,10 @@ class FullBloomTest : public testing::Test {
|
|||||||
return filter_size_;
|
return filter_size_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Slice FilterData() {
|
||||||
|
return Slice(buf_.get(), filter_size_);
|
||||||
|
}
|
||||||
|
|
||||||
bool Matches(const Slice& s) {
|
bool Matches(const Slice& s) {
|
||||||
if (bits_reader_ == nullptr) {
|
if (bits_reader_ == nullptr) {
|
||||||
Build();
|
Build();
|
||||||
@ -305,6 +379,84 @@ TEST_F(FullBloomTest, FullVaryingLengths) {
|
|||||||
ASSERT_LE(mediocre_filters, good_filters/5);
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
inline uint32_t SelectByCacheLineSize(uint32_t for64,
|
||||||
|
uint32_t for128,
|
||||||
|
uint32_t for256) {
|
||||||
|
(void)for64;
|
||||||
|
(void)for128;
|
||||||
|
(void)for256;
|
||||||
|
#if CACHE_LINE_SIZE == 64
|
||||||
|
return for64;
|
||||||
|
#elif CACHE_LINE_SIZE == 128
|
||||||
|
return for128;
|
||||||
|
#elif CACHE_LINE_SIZE == 256
|
||||||
|
return for256;
|
||||||
|
#else
|
||||||
|
#error "CACHE_LINE_SIZE unknown or unrecognized"
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Ensure the implementation doesn't accidentally change in an
|
||||||
|
// incompatible way
|
||||||
|
TEST_F(FullBloomTest, Schema) {
|
||||||
|
char buffer[sizeof(int)];
|
||||||
|
|
||||||
|
// Use enough keys so that changing bits / key by 1 is guaranteed to
|
||||||
|
// change number of allocated cache lines. So keys > max cache line bits.
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5
|
||||||
|
for (int key = 0; key < 2087; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()),
|
||||||
|
SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6
|
||||||
|
for (int key = 0; key < 2087; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()),
|
||||||
|
SelectByCacheLineSize(2092755149, 661139132, 1182970461));
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7
|
||||||
|
for (int key = 0; key < 2087; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()),
|
||||||
|
SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6
|
||||||
|
for (int key = 0; key < 2087; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()),
|
||||||
|
SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(10));
|
||||||
|
for (int key = 1; key < 2087; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()),
|
||||||
|
SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
|
||||||
|
|
||||||
|
ResetPolicy(NewBloomFilterPolicy(10));
|
||||||
|
for (int key = 1; key < 2088; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
ASSERT_EQ(BloomHash(FilterData()),
|
||||||
|
SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
|
||||||
|
|
||||||
|
ResetPolicy();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user