2016-02-09 15:12:00 -08:00
|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
2017-07-15 16:03:42 -07:00
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
2013-10-16 14:59:46 -07:00
|
|
|
//
|
2012-04-17 08:36:46 -07:00
|
|
|
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
2014-05-09 08:34:18 -07:00
|
|
|
#ifndef GFLAGS
|
|
|
|
#include <cstdio>
|
|
|
|
int main() {
|
2015-09-15 18:10:36 -07:00
|
|
|
fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
|
|
|
|
return 0;
|
2014-05-09 08:34:18 -07:00
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
2014-09-08 10:37:05 -07:00
|
|
|
#include <vector>
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2019-05-31 17:19:43 -07:00
|
|
|
#include "logging/logging.h"
|
2019-05-30 17:39:43 -07:00
|
|
|
#include "memory/arena.h"
|
2013-08-23 08:38:13 -07:00
|
|
|
#include "rocksdb/filter_policy.h"
|
2017-07-02 10:36:10 -07:00
|
|
|
#include "table/full_filter_bits_builder.h"
|
2019-05-30 11:21:38 -07:00
|
|
|
#include "test_util/testharness.h"
|
|
|
|
#include "test_util/testutil.h"
|
2019-05-30 17:39:43 -07:00
|
|
|
#include "util/gflags_compat.h"
|
2019-09-20 12:00:55 -07:00
|
|
|
#include "util/hash.h"
|
2012-04-17 08:36:46 -07:00
|
|
|
|
2017-12-01 10:40:45 -08:00
|
|
|
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
|
2014-05-09 08:34:18 -07:00
|
|
|
|
2013-11-27 14:27:02 -08:00
|
|
|
DEFINE_int32(bits_per_key, 10, "");
|
|
|
|
|
2013-10-03 21:49:15 -07:00
|
|
|
namespace rocksdb {
|
2012-04-17 08:36:46 -07:00
|
|
|
|
|
|
|
static const int kVerbose = 1;
|
|
|
|
|
|
|
|
static Slice Key(int i, char* buffer) {
|
2017-04-21 20:41:37 -07:00
|
|
|
std::string s;
|
|
|
|
PutFixed32(&s, static_cast<uint32_t>(i));
|
|
|
|
memcpy(buffer, s.c_str(), sizeof(i));
|
2012-04-17 08:36:46 -07:00
|
|
|
return Slice(buffer, sizeof(i));
|
|
|
|
}
|
|
|
|
|
2014-09-08 10:37:05 -07:00
|
|
|
static int NextLength(int length) {
|
|
|
|
if (length < 10) {
|
|
|
|
length += 1;
|
|
|
|
} else if (length < 100) {
|
|
|
|
length += 10;
|
|
|
|
} else if (length < 1000) {
|
|
|
|
length += 100;
|
|
|
|
} else {
|
|
|
|
length += 1000;
|
|
|
|
}
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
class BloomTest : public testing::Test {
|
2012-04-17 08:36:46 -07:00
|
|
|
private:
|
2019-09-09 14:49:39 -07:00
|
|
|
std::unique_ptr<const FilterPolicy> policy_;
|
2012-04-17 08:36:46 -07:00
|
|
|
std::string filter_;
|
|
|
|
std::vector<std::string> keys_;
|
|
|
|
|
|
|
|
public:
|
2014-09-08 10:37:05 -07:00
|
|
|
BloomTest() : policy_(
|
|
|
|
NewBloomFilterPolicy(FLAGS_bits_per_key)) {}
|
2012-04-17 08:36:46 -07:00
|
|
|
|
|
|
|
void Reset() {
|
|
|
|
keys_.clear();
|
|
|
|
filter_.clear();
|
|
|
|
}
|
|
|
|
|
2019-09-09 14:49:39 -07:00
|
|
|
void ResetPolicy(const FilterPolicy* policy = nullptr) {
|
|
|
|
if (policy == nullptr) {
|
|
|
|
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key));
|
|
|
|
} else {
|
|
|
|
policy_.reset(policy);
|
|
|
|
}
|
|
|
|
Reset();
|
|
|
|
}
|
|
|
|
|
2012-04-17 08:36:46 -07:00
|
|
|
void Add(const Slice& s) {
|
|
|
|
keys_.push_back(s.ToString());
|
|
|
|
}
|
|
|
|
|
|
|
|
void Build() {
|
|
|
|
std::vector<Slice> key_slices;
|
|
|
|
for (size_t i = 0; i < keys_.size(); i++) {
|
|
|
|
key_slices.push_back(Slice(keys_[i]));
|
|
|
|
}
|
|
|
|
filter_.clear();
|
2014-11-11 16:47:22 -05:00
|
|
|
policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
|
|
|
|
&filter_);
|
2012-04-17 08:36:46 -07:00
|
|
|
keys_.clear();
|
|
|
|
if (kVerbose >= 2) DumpFilter();
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t FilterSize() const {
|
|
|
|
return filter_.size();
|
|
|
|
}
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
Slice FilterData() const { return Slice(filter_); }
|
2019-09-09 14:49:39 -07:00
|
|
|
|
2012-04-17 08:36:46 -07:00
|
|
|
void DumpFilter() {
|
|
|
|
fprintf(stderr, "F(");
|
|
|
|
for (size_t i = 0; i+1 < filter_.size(); i++) {
|
|
|
|
const unsigned int c = static_cast<unsigned int>(filter_[i]);
|
|
|
|
for (int j = 0; j < 8; j++) {
|
|
|
|
fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fprintf(stderr, ")\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Matches(const Slice& s) {
|
|
|
|
if (!keys_.empty()) {
|
|
|
|
Build();
|
|
|
|
}
|
|
|
|
return policy_->KeyMayMatch(s, filter_);
|
|
|
|
}
|
|
|
|
|
|
|
|
double FalsePositiveRate() {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
int result = 0;
|
|
|
|
for (int i = 0; i < 10000; i++) {
|
|
|
|
if (Matches(Key(i + 1000000000, buffer))) {
|
|
|
|
result++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result / 10000.0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(BloomTest, EmptyFilter) {
|
2012-04-17 08:36:46 -07:00
|
|
|
ASSERT_TRUE(! Matches("hello"));
|
|
|
|
ASSERT_TRUE(! Matches("world"));
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(BloomTest, Small) {
|
2012-04-17 08:36:46 -07:00
|
|
|
Add("hello");
|
|
|
|
Add("world");
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
ASSERT_TRUE(! Matches("x"));
|
|
|
|
ASSERT_TRUE(! Matches("foo"));
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(BloomTest, VaryingLengths) {
|
2012-04-17 08:36:46 -07:00
|
|
|
char buffer[sizeof(int)];
|
|
|
|
|
|
|
|
// Count number of filters that significantly exceed the false positive rate
|
|
|
|
int mediocre_filters = 0;
|
|
|
|
int good_filters = 0;
|
|
|
|
|
|
|
|
for (int length = 1; length <= 10000; length = NextLength(length)) {
|
|
|
|
Reset();
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
Add(Key(i, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
|
2012-11-06 12:02:18 -08:00
|
|
|
ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length;
|
2012-04-17 08:36:46 -07:00
|
|
|
|
|
|
|
// All added keys must match
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
ASSERT_TRUE(Matches(Key(i, buffer)))
|
|
|
|
<< "Length " << length << "; key " << i;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check false positive rate
|
|
|
|
double rate = FalsePositiveRate();
|
|
|
|
if (kVerbose >= 1) {
|
|
|
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
|
|
|
|
rate*100.0, length, static_cast<int>(FilterSize()));
|
|
|
|
}
|
|
|
|
ASSERT_LE(rate, 0.02); // Must not be over 2%
|
|
|
|
if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
|
|
|
|
else good_filters++;
|
|
|
|
}
|
|
|
|
if (kVerbose >= 1) {
|
|
|
|
fprintf(stderr, "Filters: %d good, %d mediocre\n",
|
|
|
|
good_filters, mediocre_filters);
|
|
|
|
}
|
|
|
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
|
|
|
}
|
|
|
|
|
2019-09-09 14:49:39 -07:00
|
|
|
// Ensure the implementation doesn't accidentally change in an
|
|
|
|
// incompatible way
|
|
|
|
TEST_F(BloomTest, Schema) {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5
|
2019-09-09 14:49:39 -07:00
|
|
|
for (int key = 0; key < 87; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6
|
2019-09-09 14:49:39 -07:00
|
|
|
for (int key = 0; key < 87; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()), 969445585);
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7
|
2019-09-09 14:49:39 -07:00
|
|
|
for (int key = 0; key < 87; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()), 1694458207);
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6
|
2019-09-09 14:49:39 -07:00
|
|
|
for (int key = 0; key < 87; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
|
|
|
|
|
|
|
|
ResetPolicy(NewBloomFilterPolicy(10));
|
|
|
|
for (int key = 1; key < 87; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()), 1908442116);
|
|
|
|
|
|
|
|
ResetPolicy(NewBloomFilterPolicy(10));
|
|
|
|
for (int key = 1; key < 88; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
|
|
|
|
|
|
|
|
ResetPolicy();
|
|
|
|
}
|
|
|
|
|
2012-04-17 08:36:46 -07:00
|
|
|
// Different bits-per-byte
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
class FullBloomTest : public testing::Test {
|
2014-09-08 10:37:05 -07:00
|
|
|
private:
|
2019-09-09 14:49:39 -07:00
|
|
|
std::unique_ptr<const FilterPolicy> policy_;
|
2014-09-08 10:37:05 -07:00
|
|
|
std::unique_ptr<FilterBitsBuilder> bits_builder_;
|
|
|
|
std::unique_ptr<FilterBitsReader> bits_reader_;
|
|
|
|
std::unique_ptr<const char[]> buf_;
|
|
|
|
size_t filter_size_;
|
|
|
|
|
|
|
|
public:
|
|
|
|
FullBloomTest() :
|
|
|
|
policy_(NewBloomFilterPolicy(FLAGS_bits_per_key, false)),
|
|
|
|
filter_size_(0) {
|
|
|
|
Reset();
|
|
|
|
}
|
|
|
|
|
2017-07-02 10:36:10 -07:00
|
|
|
FullFilterBitsBuilder* GetFullFilterBitsBuilder() {
|
|
|
|
return dynamic_cast<FullFilterBitsBuilder*>(bits_builder_.get());
|
|
|
|
}
|
|
|
|
|
2014-09-08 10:37:05 -07:00
|
|
|
void Reset() {
|
|
|
|
bits_builder_.reset(policy_->GetFilterBitsBuilder());
|
|
|
|
bits_reader_.reset(nullptr);
|
|
|
|
buf_.reset(nullptr);
|
|
|
|
filter_size_ = 0;
|
|
|
|
}
|
|
|
|
|
2019-09-09 14:49:39 -07:00
|
|
|
void ResetPolicy(const FilterPolicy* policy = nullptr) {
|
|
|
|
if (policy == nullptr) {
|
|
|
|
policy_.reset(NewBloomFilterPolicy(FLAGS_bits_per_key, false));
|
|
|
|
} else {
|
|
|
|
policy_.reset(policy);
|
|
|
|
}
|
|
|
|
Reset();
|
|
|
|
}
|
|
|
|
|
2014-09-08 10:37:05 -07:00
|
|
|
void Add(const Slice& s) {
|
|
|
|
bits_builder_->AddKey(s);
|
|
|
|
}
|
|
|
|
|
2019-10-02 15:31:54 -07:00
|
|
|
void OpenRaw(const Slice& s) {
|
|
|
|
bits_reader_.reset(policy_->GetFilterBitsReader(s));
|
|
|
|
}
|
|
|
|
|
2014-09-08 10:37:05 -07:00
|
|
|
void Build() {
|
|
|
|
Slice filter = bits_builder_->Finish(&buf_);
|
|
|
|
bits_reader_.reset(policy_->GetFilterBitsReader(filter));
|
|
|
|
filter_size_ = filter.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t FilterSize() const {
|
|
|
|
return filter_size_;
|
|
|
|
}
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
Slice FilterData() { return Slice(buf_.get(), filter_size_); }
|
2019-09-09 14:49:39 -07:00
|
|
|
|
2014-09-08 10:37:05 -07:00
|
|
|
bool Matches(const Slice& s) {
|
|
|
|
if (bits_reader_ == nullptr) {
|
|
|
|
Build();
|
|
|
|
}
|
|
|
|
return bits_reader_->MayMatch(s);
|
|
|
|
}
|
|
|
|
|
2019-10-02 15:31:54 -07:00
|
|
|
uint64_t PackedMatches() {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
uint64_t result = 0;
|
|
|
|
for (int i = 0; i < 64; i++) {
|
|
|
|
if (Matches(Key(i + 12345, buffer))) {
|
2019-10-03 13:17:44 -07:00
|
|
|
result |= uint64_t{1} << i;
|
2019-10-02 15:31:54 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2014-09-08 10:37:05 -07:00
|
|
|
double FalsePositiveRate() {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
int result = 0;
|
|
|
|
for (int i = 0; i < 10000; i++) {
|
|
|
|
if (Matches(Key(i + 1000000000, buffer))) {
|
|
|
|
result++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result / 10000.0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-07-02 10:36:10 -07:00
|
|
|
TEST_F(FullBloomTest, FilterSize) {
|
|
|
|
uint32_t dont_care1, dont_care2;
|
|
|
|
auto full_bits_builder = GetFullFilterBitsBuilder();
|
|
|
|
for (int n = 1; n < 100; n++) {
|
|
|
|
auto space = full_bits_builder->CalculateSpace(n, &dont_care1, &dont_care2);
|
|
|
|
auto n2 = full_bits_builder->CalculateNumEntry(space);
|
|
|
|
ASSERT_GE(n2, n);
|
|
|
|
auto space2 =
|
|
|
|
full_bits_builder->CalculateSpace(n2, &dont_care1, &dont_care2);
|
|
|
|
ASSERT_EQ(space, space2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(FullBloomTest, FullEmptyFilter) {
|
2014-09-08 10:37:05 -07:00
|
|
|
// Empty filter is not match, at this level
|
|
|
|
ASSERT_TRUE(!Matches("hello"));
|
|
|
|
ASSERT_TRUE(!Matches("world"));
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(FullBloomTest, FullSmall) {
|
2014-09-08 10:37:05 -07:00
|
|
|
Add("hello");
|
|
|
|
Add("world");
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
ASSERT_TRUE(!Matches("x"));
|
|
|
|
ASSERT_TRUE(!Matches("foo"));
|
|
|
|
}
|
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
TEST_F(FullBloomTest, FullVaryingLengths) {
|
2014-09-08 10:37:05 -07:00
|
|
|
char buffer[sizeof(int)];
|
|
|
|
|
|
|
|
// Count number of filters that significantly exceed the false positive rate
|
|
|
|
int mediocre_filters = 0;
|
|
|
|
int good_filters = 0;
|
|
|
|
|
|
|
|
for (int length = 1; length <= 10000; length = NextLength(length)) {
|
|
|
|
Reset();
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
Add(Key(i, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
|
2019-09-16 16:15:18 -07:00
|
|
|
ASSERT_LE(FilterSize(),
|
|
|
|
(size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5));
|
2014-09-08 10:37:05 -07:00
|
|
|
|
|
|
|
// All added keys must match
|
|
|
|
for (int i = 0; i < length; i++) {
|
|
|
|
ASSERT_TRUE(Matches(Key(i, buffer)))
|
|
|
|
<< "Length " << length << "; key " << i;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check false positive rate
|
|
|
|
double rate = FalsePositiveRate();
|
|
|
|
if (kVerbose >= 1) {
|
|
|
|
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
|
|
|
|
rate*100.0, length, static_cast<int>(FilterSize()));
|
|
|
|
}
|
|
|
|
ASSERT_LE(rate, 0.02); // Must not be over 2%
|
|
|
|
if (rate > 0.0125)
|
|
|
|
mediocre_filters++; // Allowed, but not too often
|
|
|
|
else
|
|
|
|
good_filters++;
|
|
|
|
}
|
|
|
|
if (kVerbose >= 1) {
|
|
|
|
fprintf(stderr, "Filters: %d good, %d mediocre\n",
|
|
|
|
good_filters, mediocre_filters);
|
|
|
|
}
|
|
|
|
ASSERT_LE(mediocre_filters, good_filters/5);
|
|
|
|
}
|
|
|
|
|
2019-09-09 14:49:39 -07:00
|
|
|
namespace {
|
2019-09-20 12:00:55 -07:00
|
|
|
inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
|
|
|
|
uint32_t for256) {
|
2019-09-09 14:49:39 -07:00
|
|
|
(void)for64;
|
|
|
|
(void)for128;
|
|
|
|
(void)for256;
|
|
|
|
#if CACHE_LINE_SIZE == 64
|
|
|
|
return for64;
|
|
|
|
#elif CACHE_LINE_SIZE == 128
|
|
|
|
return for128;
|
|
|
|
#elif CACHE_LINE_SIZE == 256
|
|
|
|
return for256;
|
|
|
|
#else
|
2019-09-20 12:00:55 -07:00
|
|
|
#error "CACHE_LINE_SIZE unknown or unrecognized"
|
2019-09-09 14:49:39 -07:00
|
|
|
#endif
|
|
|
|
}
|
2019-09-20 12:00:55 -07:00
|
|
|
} // namespace
|
2019-09-09 14:49:39 -07:00
|
|
|
|
|
|
|
// Ensure the implementation doesn't accidentally change in an
|
|
|
|
// incompatible way
|
|
|
|
TEST_F(FullBloomTest, Schema) {
|
|
|
|
char buffer[sizeof(int)];
|
|
|
|
|
|
|
|
// Use enough keys so that changing bits / key by 1 is guaranteed to
|
|
|
|
// change number of allocated cache lines. So keys > max cache line bits.
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
ResetPolicy(NewBloomFilterPolicy(8)); // num_probes = 5
|
2019-09-09 14:49:39 -07:00
|
|
|
for (int key = 0; key < 2087; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()),
|
|
|
|
SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
ResetPolicy(NewBloomFilterPolicy(9)); // num_probes = 6
|
2019-09-09 14:49:39 -07:00
|
|
|
for (int key = 0; key < 2087; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()),
|
|
|
|
SelectByCacheLineSize(2092755149, 661139132, 1182970461));
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
ResetPolicy(NewBloomFilterPolicy(11)); // num_probes = 7
|
2019-09-09 14:49:39 -07:00
|
|
|
for (int key = 0; key < 2087; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()),
|
|
|
|
SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
|
|
|
|
|
2019-09-20 12:00:55 -07:00
|
|
|
ResetPolicy(NewBloomFilterPolicy(10)); // num_probes = 6
|
2019-09-09 14:49:39 -07:00
|
|
|
for (int key = 0; key < 2087; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()),
|
|
|
|
SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
|
|
|
|
|
|
|
|
ResetPolicy(NewBloomFilterPolicy(10));
|
|
|
|
for (int key = 1; key < 2087; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()),
|
|
|
|
SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
|
|
|
|
|
|
|
|
ResetPolicy(NewBloomFilterPolicy(10));
|
|
|
|
for (int key = 1; key < 2088; key++) {
|
|
|
|
Add(Key(key, buffer));
|
|
|
|
}
|
|
|
|
Build();
|
|
|
|
ASSERT_EQ(BloomHash(FilterData()),
|
|
|
|
SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
|
|
|
|
|
|
|
|
ResetPolicy();
|
|
|
|
}
|
|
|
|
|
2019-10-02 15:31:54 -07:00
|
|
|
// A helper class for testing custom or corrupt filter bits as read by
|
|
|
|
// FullFilterBitsReader.
|
|
|
|
struct RawFilterTester {
|
|
|
|
// Buffer, from which we always return a tail Slice, so the
|
|
|
|
// last five bytes are always the metadata bytes.
|
|
|
|
std::array<char, 3000> data_;
|
|
|
|
// Points five bytes from the end
|
|
|
|
char* metadata_ptr_;
|
|
|
|
|
|
|
|
RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
|
|
|
|
|
|
|
|
Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
|
|
|
|
uint32_t num_probes) {
|
|
|
|
metadata_ptr_[0] = static_cast<char>(num_probes);
|
|
|
|
EncodeFixed32(metadata_ptr_ + 1, num_lines);
|
|
|
|
uint32_t len = len_without_metadata + /*metadata*/ 5;
|
|
|
|
assert(len <= data_.size());
|
|
|
|
return Slice(metadata_ptr_ - len_without_metadata, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
|
|
|
|
uint32_t num_probes, bool fill_ones) {
|
|
|
|
data_.fill(fill_ones ? 0xff : 0);
|
|
|
|
return ResetNoFill(len_without_metadata, num_lines, num_probes);
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
|
|
|
|
uint32_t num_probes) {
|
|
|
|
for (uint32_t i = 0; i < data_.size(); ++i) {
|
|
|
|
data_[i] = static_cast<char>(0x7b7b >> (i % 7));
|
|
|
|
}
|
|
|
|
return ResetNoFill(len_without_metadata, num_lines, num_probes);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST_F(FullBloomTest, RawSchema) {
|
|
|
|
RawFilterTester cft;
|
|
|
|
// Two probes, about 3/4 bits set: ~50% "FP" rate
|
|
|
|
// One 256-byte cache line.
|
|
|
|
OpenRaw(cft.ResetWeirdFill(256, 1, 2));
|
2019-10-03 13:17:44 -07:00
|
|
|
ASSERT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
|
2019-10-02 15:31:54 -07:00
|
|
|
|
|
|
|
// Two 128-byte cache lines.
|
|
|
|
OpenRaw(cft.ResetWeirdFill(256, 2, 2));
|
2019-10-03 13:17:44 -07:00
|
|
|
ASSERT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
|
2019-10-02 15:31:54 -07:00
|
|
|
|
|
|
|
// Four 64-byte cache lines.
|
|
|
|
OpenRaw(cft.ResetWeirdFill(256, 4, 2));
|
2019-10-03 13:17:44 -07:00
|
|
|
ASSERT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
|
2019-10-02 15:31:54 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(FullBloomTest, CorruptFilters) {
|
|
|
|
RawFilterTester cft;
|
|
|
|
|
|
|
|
for (bool fill : {false, true}) {
|
|
|
|
// Good filter bits - returns same as fill
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
|
|
|
|
ASSERT_EQ(fill, Matches("hello"));
|
|
|
|
ASSERT_EQ(fill, Matches("world"));
|
|
|
|
|
|
|
|
// Good filter bits - returns same as fill
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
|
|
|
|
ASSERT_EQ(fill, Matches("hello"));
|
|
|
|
ASSERT_EQ(fill, Matches("world"));
|
|
|
|
|
|
|
|
// Good filter bits - returns same as fill
|
|
|
|
// 256 is unusual but legal cache line size
|
|
|
|
OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
|
|
|
|
ASSERT_EQ(fill, Matches("hello"));
|
|
|
|
ASSERT_EQ(fill, Matches("world"));
|
|
|
|
|
|
|
|
// Good filter bits - returns same as fill
|
|
|
|
// 30 should be max num_probes
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
|
|
|
|
ASSERT_EQ(fill, Matches("hello"));
|
|
|
|
ASSERT_EQ(fill, Matches("world"));
|
|
|
|
|
|
|
|
// Good filter bits - returns same as fill
|
|
|
|
// 1 should be min num_probes
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
|
|
|
|
ASSERT_EQ(fill, Matches("hello"));
|
|
|
|
ASSERT_EQ(fill, Matches("world"));
|
|
|
|
|
|
|
|
// Type 1 trivial filter bits - returns true as if FP by zero probes
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
|
|
|
|
// Type 2 trivial filter bits - returns false as if built from zero keys
|
|
|
|
OpenRaw(cft.Reset(0, 0, 6, fill));
|
|
|
|
ASSERT_FALSE(Matches("hello"));
|
|
|
|
ASSERT_FALSE(Matches("world"));
|
|
|
|
|
|
|
|
// Type 2 trivial filter bits - returns false as if built from zero keys
|
|
|
|
OpenRaw(cft.Reset(0, 37, 6, fill));
|
|
|
|
ASSERT_FALSE(Matches("hello"));
|
|
|
|
ASSERT_FALSE(Matches("world"));
|
|
|
|
|
|
|
|
// Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
|
|
|
|
OpenRaw(cft.Reset(0, 0, 0, fill));
|
|
|
|
ASSERT_FALSE(Matches("hello"));
|
|
|
|
ASSERT_FALSE(Matches("world"));
|
|
|
|
|
|
|
|
// Bad filter bits - returns true for safety
|
|
|
|
// No solution to 0 * x == CACHE_LINE_SIZE
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
|
|
|
|
// Bad filter bits - returns true for safety
|
|
|
|
// Can't have 3 * x == 4 for integer x
|
|
|
|
OpenRaw(cft.Reset(4, 3, 6, fill));
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
|
|
|
|
// Bad filter bits - returns true for safety
|
|
|
|
// 97 bytes is not a power of two, so not a legal cache line size
|
|
|
|
OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
|
|
|
|
// Bad filter bits
|
|
|
|
// 65 bytes is not a power of two, so not a legal cache line size
|
|
|
|
OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
|
|
|
|
// ASSERT_TRUE(Matches("hello"));
|
|
|
|
// ASSERT_TRUE(Matches("world"));
|
|
|
|
// NB: NOT PROPERLY CHECKED in implementation
|
|
|
|
ASSERT_EQ(fill, Matches("hello"));
|
|
|
|
ASSERT_EQ(fill, Matches("world"));
|
|
|
|
|
|
|
|
// Bad filter bits - returns false as if built from zero keys
|
|
|
|
// < 5 bytes overall means missing even metadata
|
|
|
|
OpenRaw(cft.Reset(-1, 3, 6, fill));
|
|
|
|
ASSERT_FALSE(Matches("hello"));
|
|
|
|
ASSERT_FALSE(Matches("world"));
|
|
|
|
|
|
|
|
OpenRaw(cft.Reset(-5, 3, 6, fill));
|
|
|
|
ASSERT_FALSE(Matches("hello"));
|
|
|
|
ASSERT_FALSE(Matches("world"));
|
|
|
|
|
|
|
|
// Dubious filter bits - returns same as fill (for now)
|
|
|
|
// 31 is not a useful num_probes, nor generated by RocksDB unless directly
|
|
|
|
// using filter bits API without BloomFilterPolicy.
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
|
|
|
|
ASSERT_EQ(fill, Matches("hello"));
|
|
|
|
ASSERT_EQ(fill, Matches("world"));
|
|
|
|
|
|
|
|
// Dubious filter bits - returns same as fill (for now)
|
|
|
|
// Similar, with 127, largest positive char
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
|
|
|
|
ASSERT_EQ(fill, Matches("hello"));
|
|
|
|
ASSERT_EQ(fill, Matches("world"));
|
|
|
|
|
|
|
|
// Dubious filter bits - returns true (for now)
|
|
|
|
// num_probes set to 128 / -128, lowest negative char
|
|
|
|
// NB: Bug in implementation interprets this as negative and has same
|
|
|
|
// effect as zero probes, but effectively reserves negative char values
|
|
|
|
// for future use.
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
|
|
|
|
// Dubious filter bits - returns true (for now)
|
|
|
|
// Similar, with 255 / -1
|
|
|
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 255, fill));
|
|
|
|
ASSERT_TRUE(Matches("hello"));
|
|
|
|
ASSERT_TRUE(Matches("world"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-10-03 21:49:15 -07:00
|
|
|
} // namespace rocksdb
|
2012-04-17 08:36:46 -07:00
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
2015-03-17 14:08:00 -07:00
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
2014-05-09 08:34:18 -07:00
|
|
|
ParseCommandLineFlags(&argc, &argv, true);
|
2013-11-27 14:27:02 -08:00
|
|
|
|
2015-03-17 14:08:00 -07:00
|
|
|
return RUN_ALL_TESTS();
|
2012-04-17 08:36:46 -07:00
|
|
|
}
|
2014-05-09 08:34:18 -07:00
|
|
|
|
|
|
|
#endif // GFLAGS
|