Add Ribbon schema test to bloom_test (#7696)
Summary: These new unit tests should ensure that we don't accidentally change the interpretation of bits for what I call Standard128Ribbon filter internally, available publicly as NewExperimentalRibbonFilterPolicy. There is very little intuitive reason for the values we check against in these tests; I just plug in the right expected values upon watching the test fail initially. Most (but not all) of the tests are essentially "whitebox" "round-trip." We create a filter from fixed keys, and first compare the checksum of those filter bytes against a saved value. We also run queries against other fixed keys, comparing which return false positives against a saved set. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7696 Test Plan: test addition and refactoring only Reviewed By: jay-zhuang Differential Revision: D25082289 Pulled By: pdillinger fbshipit-source-id: b5ca646fdcb5a1c2ad2085eda4a1fd44c4287f67
This commit is contained in:
parent
1a5fc4f577
commit
0baa5055f1
@ -322,6 +322,13 @@ class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int GetRibbonSeedFromFilterData() {
|
||||||
|
assert(filter_size_ >= 5);
|
||||||
|
// Check for ribbon marker
|
||||||
|
assert(-2 == static_cast<int8_t>(buf_.get()[filter_size_ - 5]));
|
||||||
|
return static_cast<uint8_t>(buf_.get()[filter_size_ - 4]);
|
||||||
|
}
|
||||||
|
|
||||||
bool Matches(const Slice& s) {
|
bool Matches(const Slice& s) {
|
||||||
if (bits_reader_ == nullptr) {
|
if (bits_reader_ == nullptr) {
|
||||||
Build();
|
Build();
|
||||||
@ -372,23 +379,6 @@ class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
|
|||||||
}
|
}
|
||||||
return result / 10000.0;
|
return result / 10000.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t SelectByImpl(uint32_t for_legacy_bloom,
|
|
||||||
uint32_t for_fast_local_bloom) {
|
|
||||||
switch (GetParam()) {
|
|
||||||
case BloomFilterPolicy::kLegacyBloom:
|
|
||||||
return for_legacy_bloom;
|
|
||||||
case BloomFilterPolicy::kFastLocalBloom:
|
|
||||||
return for_fast_local_bloom;
|
|
||||||
case BloomFilterPolicy::kDeprecatedBlock:
|
|
||||||
case BloomFilterPolicy::kAutoBloom:
|
|
||||||
case BloomFilterPolicy::kStandard128Ribbon:
|
|
||||||
/* N/A */;
|
|
||||||
}
|
|
||||||
// otherwise
|
|
||||||
assert(false);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_P(FullBloomTest, FilterSize) {
|
TEST_P(FullBloomTest, FilterSize) {
|
||||||
@ -601,98 +591,160 @@ inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
|
|||||||
// ability to read filters generated using other cache line sizes.
|
// ability to read filters generated using other cache line sizes.
|
||||||
// See RawSchema.
|
// See RawSchema.
|
||||||
TEST_P(FullBloomTest, Schema) {
|
TEST_P(FullBloomTest, Schema) {
|
||||||
if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) {
|
#define EXPECT_EQ_Bloom(a, b) \
|
||||||
// TODO ASAP to ensure schema stability
|
{ \
|
||||||
return;
|
if (GetParam() != BloomFilterPolicy::kStandard128Ribbon) { \
|
||||||
|
EXPECT_EQ(a, b); \
|
||||||
|
} \
|
||||||
}
|
}
|
||||||
|
#define EXPECT_EQ_Ribbon(a, b) \
|
||||||
|
{ \
|
||||||
|
if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { \
|
||||||
|
EXPECT_EQ(a, b); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#define EXPECT_EQ_FastBloom(a, b) \
|
||||||
|
{ \
|
||||||
|
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) { \
|
||||||
|
EXPECT_EQ(a, b); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#define EXPECT_EQ_LegacyBloom(a, b) \
|
||||||
|
{ \
|
||||||
|
if (GetParam() == BloomFilterPolicy::kLegacyBloom) { \
|
||||||
|
EXPECT_EQ(a, b); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#define EXPECT_EQ_NotLegacy(a, b) \
|
||||||
|
{ \
|
||||||
|
if (GetParam() != BloomFilterPolicy::kLegacyBloom) { \
|
||||||
|
EXPECT_EQ(a, b); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
char buffer[sizeof(int)];
|
char buffer[sizeof(int)];
|
||||||
|
|
||||||
// Use enough keys so that changing bits / key by 1 is guaranteed to
|
// First do a small number of keys, where Ribbon config will fall back on
|
||||||
|
// fast Bloom filter and generate the same data
|
||||||
|
ResetPolicy(5); // num_probes = 3
|
||||||
|
for (int key = 0; key < 87; key++) {
|
||||||
|
Add(Key(key, buffer));
|
||||||
|
}
|
||||||
|
Build();
|
||||||
|
EXPECT_EQ(GetNumProbesFromFilterData(), 3);
|
||||||
|
|
||||||
|
EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U);
|
||||||
|
|
||||||
|
EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10));
|
||||||
|
|
||||||
|
// Now use enough keys so that changing bits / key by 1 is guaranteed to
|
||||||
// change number of allocated cache lines. So keys > max cache line bits.
|
// change number of allocated cache lines. So keys > max cache line bits.
|
||||||
|
|
||||||
|
// Note that the first attempted Ribbon seed is determined by the hash
|
||||||
|
// of the first key added (for pseudorandomness in practice, determinism in
|
||||||
|
// testing)
|
||||||
|
|
||||||
ResetPolicy(2); // num_probes = 1
|
ResetPolicy(2); // num_probes = 1
|
||||||
for (int key = 0; key < 2087; key++) {
|
for (int key = 0; key < 2087; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 1);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(1567096579, 1964771444, 2659542661U),
|
SelectByCacheLineSize(1567096579, 1964771444, 2659542661U));
|
||||||
3817481309U));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228);
|
||||||
EXPECT_EQ("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
|
||||||
|
EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy(3); // num_probes = 2
|
ResetPolicy(3); // num_probes = 2
|
||||||
for (int key = 0; key < 2087; key++) {
|
for (int key = 0; key < 2087; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 2);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(2707206547U, 2571983456U, 218344685),
|
SelectByCacheLineSize(2707206547U, 2571983456U, 218344685));
|
||||||
2807269961U));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358);
|
||||||
EXPECT_EQ("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
|
||||||
|
EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy(5); // num_probes = 3
|
ResetPolicy(5); // num_probes = 3
|
||||||
for (int key = 0; key < 2087; key++) {
|
for (int key = 0; key < 2087; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 3);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(515748486, 94611728, 2436112214U),
|
SelectByCacheLineSize(515748486, 94611728, 2436112214U));
|
||||||
204628445));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U);
|
||||||
EXPECT_EQ("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
|
||||||
|
EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy(8); // num_probes = 5
|
ResetPolicy(8); // num_probes = 5
|
||||||
for (int key = 0; key < 2087; key++) {
|
for (int key = 0; key < 2087; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 5);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(1302145999, 2811644657U, 756553699),
|
SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
|
||||||
355564975));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U);
|
||||||
EXPECT_EQ("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
|
||||||
|
EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy(9); // num_probes = 6
|
ResetPolicy(9); // num_probes = 6
|
||||||
for (int key = 0; key < 2087; key++) {
|
for (int key = 0; key < 2087; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 6);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(2092755149, 661139132, 1182970461),
|
SelectByCacheLineSize(2092755149, 661139132, 1182970461));
|
||||||
2137566013U));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675);
|
||||||
EXPECT_EQ("156,367,791,872,945,1015,1139,1159,1265,1435", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9));
|
||||||
|
EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy(11); // num_probes = 7
|
ResetPolicy(11); // num_probes = 7
|
||||||
for (int key = 0; key < 2087; key++) {
|
for (int key = 0; key < 2087; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 7);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(3755609649U, 1812694762, 1449142939),
|
SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
|
||||||
2561502687U));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U);
|
||||||
EXPECT_EQ("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
|
||||||
|
EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10));
|
||||||
|
|
||||||
// This used to be 9 probes, but 8 is a better choice for speed,
|
// This used to be 9 probes, but 8 is a better choice for speed,
|
||||||
// especially with SIMD groups of 8 probes, with essentially no
|
// especially with SIMD groups of 8 probes, with essentially no
|
||||||
@ -705,15 +757,18 @@ TEST_P(FullBloomTest, Schema) {
|
|||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
|
EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9);
|
||||||
SelectByImpl(9, 8));
|
EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(178861123, 379087593, 2574136516U),
|
SelectByCacheLineSize(178861123, 379087593, 2574136516U));
|
||||||
3709876890U));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875);
|
||||||
EXPECT_EQ("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
|
||||||
|
EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9));
|
||||||
|
|
||||||
// This used to be 11 probes, but 9 is a better choice for speed
|
// This used to be 11 probes, but 9 is a better choice for speed
|
||||||
// AND accuracy.
|
// AND accuracy.
|
||||||
@ -725,57 +780,69 @@ TEST_P(FullBloomTest, Schema) {
|
|||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
|
EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11);
|
||||||
SelectByImpl(11, 9));
|
EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(1129406313, 3049154394U, 1727750964),
|
SelectByCacheLineSize(1129406313, 3049154394U, 1727750964));
|
||||||
1087138490));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967);
|
||||||
EXPECT_EQ("3299,3611,3916,6620,7822,8079,8482,8942,10167", FirstFPs(9));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8));
|
||||||
|
EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9));
|
||||||
|
|
||||||
ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9
|
ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9
|
||||||
for (int key = 0; key < 2087; key++) {
|
for (int key = 0; key < 2087; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 6);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(1478976371, 2910591341U, 1182970461),
|
SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
|
||||||
2498541272U));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667);
|
||||||
EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
|
||||||
|
EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy(10);
|
ResetPolicy(10);
|
||||||
for (int key = /*CHANGED*/ 1; key < 2087; key++) {
|
for (int key = /*CHANGED*/ 1; key < 2087; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 6);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), /*CHANGED*/ 184);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U),
|
SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
|
||||||
2058382345U));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U);
|
||||||
EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
|
||||||
|
EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy(10);
|
ResetPolicy(10);
|
||||||
for (int key = 1; key < /*CHANGED*/ 2088; key++) {
|
for (int key = 1; key < /*CHANGED*/ 2088; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 6);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
|
||||||
EXPECT_EQ(
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
|
||||||
|
|
||||||
|
EXPECT_EQ_LegacyBloom(
|
||||||
BloomHash(FilterData()),
|
BloomHash(FilterData()),
|
||||||
SelectByImpl(SelectByCacheLineSize(2885052954U, 769447944, 4175124908U),
|
SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
|
||||||
23699164));
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164);
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379);
|
||||||
EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
|
|
||||||
}
|
EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
|
||||||
|
EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10));
|
||||||
|
|
||||||
// With new fractional bits_per_key, check that we are rounding to
|
// With new fractional bits_per_key, check that we are rounding to
|
||||||
// whole bits per key for old Bloom filters but fractional for
|
// whole bits per key for old Bloom filters but fractional for
|
||||||
@ -785,31 +852,35 @@ TEST_P(FullBloomTest, Schema) {
|
|||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(GetNumProbesFromFilterData(), 6);
|
EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
|
||||||
EXPECT_EQ(BloomHash(FilterData()),
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
|
||||||
SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
|
|
||||||
4175124908U),
|
EXPECT_EQ_LegacyBloom(
|
||||||
/*CHANGED*/ 3166884174U));
|
BloomHash(FilterData()),
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
|
||||||
EXPECT_EQ(/*CHANGED*/ "126,156,367,444,458,791,813,976,1015,1035",
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U);
|
||||||
FirstFPs(10));
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663);
|
||||||
}
|
|
||||||
|
EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9));
|
||||||
|
EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy(10.499);
|
ResetPolicy(10.499);
|
||||||
for (int key = 1; key < 2088; key++) {
|
for (int key = 1; key < 2088; key++) {
|
||||||
Add(Key(key, buffer));
|
Add(Key(key, buffer));
|
||||||
}
|
}
|
||||||
Build();
|
Build();
|
||||||
EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
|
EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6);
|
||||||
SelectByImpl(6, 7));
|
EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7);
|
||||||
EXPECT_EQ(BloomHash(FilterData()),
|
EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
|
||||||
SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
|
|
||||||
4175124908U),
|
EXPECT_EQ_LegacyBloom(
|
||||||
/*CHANGED*/ 4098502778U));
|
BloomHash(FilterData()),
|
||||||
if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
|
/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
|
||||||
EXPECT_EQ(/*CHANGED*/ "16,236,240,472,1015,1045,1111,1409,1465,1612",
|
EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U);
|
||||||
FirstFPs(10));
|
EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188);
|
||||||
}
|
|
||||||
|
EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9));
|
||||||
|
EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10));
|
||||||
|
|
||||||
ResetPolicy();
|
ResetPolicy();
|
||||||
}
|
}
|
||||||
@ -851,6 +922,7 @@ struct RawFilterTester {
|
|||||||
|
|
||||||
TEST_P(FullBloomTest, RawSchema) {
|
TEST_P(FullBloomTest, RawSchema) {
|
||||||
RawFilterTester cft;
|
RawFilterTester cft;
|
||||||
|
// Legacy Bloom configurations
|
||||||
// Two probes, about 3/4 bits set: ~50% "FP" rate
|
// Two probes, about 3/4 bits set: ~50% "FP" rate
|
||||||
// One 256-byte cache line.
|
// One 256-byte cache line.
|
||||||
OpenRaw(cft.ResetWeirdFill(256, 1, 2));
|
OpenRaw(cft.ResetWeirdFill(256, 1, 2));
|
||||||
@ -863,12 +935,37 @@ TEST_P(FullBloomTest, RawSchema) {
|
|||||||
// Four 64-byte cache lines.
|
// Four 64-byte cache lines.
|
||||||
OpenRaw(cft.ResetWeirdFill(256, 4, 2));
|
OpenRaw(cft.ResetWeirdFill(256, 4, 2));
|
||||||
EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
|
EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
|
||||||
|
|
||||||
|
// Fast local Bloom configurations (marker 255 -> -1)
|
||||||
|
// Two probes, about 3/4 bits set: ~50% "FP" rate
|
||||||
|
// Four 64-byte cache lines.
|
||||||
|
OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255));
|
||||||
|
EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches());
|
||||||
|
|
||||||
|
// Ribbon configurations (marker 254 -> -2)
|
||||||
|
|
||||||
|
// Even though the builder never builds configurations this
|
||||||
|
// small (preferring Bloom), we can test that the configuration
|
||||||
|
// can be read, for possible future-proofing.
|
||||||
|
|
||||||
|
// 256 slots, one result column = 32 bytes (2 blocks, seed 0)
|
||||||
|
// ~50% FP rate:
|
||||||
|
// 0b0101010111110101010000110000011011011111100100001110010011101010
|
||||||
|
OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254));
|
||||||
|
EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches());
|
||||||
|
|
||||||
|
// 256 slots, three-to-four result columns = 112 bytes
|
||||||
|
// ~ 1 in 10 FP rate:
|
||||||
|
// 0b0000000000100000000000000000000001000001000000010000101000000000
|
||||||
|
OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254));
|
||||||
|
EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(FullBloomTest, CorruptFilters) {
|
TEST_P(FullBloomTest, CorruptFilters) {
|
||||||
RawFilterTester cft;
|
RawFilterTester cft;
|
||||||
|
|
||||||
for (bool fill : {false, true}) {
|
for (bool fill : {false, true}) {
|
||||||
|
// Legacy Bloom configurations
|
||||||
// Good filter bits - returns same as fill
|
// Good filter bits - returns same as fill
|
||||||
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
|
||||||
ASSERT_EQ(fill, Matches("hello"));
|
ASSERT_EQ(fill, Matches("hello"));
|
||||||
@ -974,11 +1071,90 @@ TEST_P(FullBloomTest, CorruptFilters) {
|
|||||||
ASSERT_TRUE(Matches("world"));
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
// Dubious filter bits - returns true (for now)
|
// Dubious filter bits - returns true (for now)
|
||||||
// Similar, with 255 / -1
|
// Similar, with 253 / -3
|
||||||
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 255, fill));
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
|
// #########################################################
|
||||||
|
// Fast local Bloom configurations (marker 255 -> -1)
|
||||||
|
// Good config with six probes
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill));
|
||||||
|
ASSERT_EQ(fill, Matches("hello"));
|
||||||
|
ASSERT_EQ(fill, Matches("world"));
|
||||||
|
|
||||||
|
// Becomes bad/reserved config (always true) if any other byte set
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
|
// Good config, max 30 probes
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill));
|
||||||
|
ASSERT_EQ(fill, Matches("hello"));
|
||||||
|
ASSERT_EQ(fill, Matches("world"));
|
||||||
|
|
||||||
|
// Bad/reserved config (always true) if more than 30
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
|
OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill));
|
||||||
ASSERT_TRUE(Matches("hello"));
|
ASSERT_TRUE(Matches("hello"));
|
||||||
ASSERT_TRUE(Matches("world"));
|
ASSERT_TRUE(Matches("world"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #########################################################
|
||||||
|
// Ribbon configurations (marker 254 -> -2)
|
||||||
|
// ("fill" doesn't work to detect good configurations, we just
|
||||||
|
// have to rely on TN probability)
|
||||||
|
|
||||||
|
// Good: 2 blocks * 16 bytes / segment * 4 columns = 128 bytes
|
||||||
|
// seed = 123
|
||||||
|
OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false));
|
||||||
|
ASSERT_FALSE(Matches("hello"));
|
||||||
|
ASSERT_FALSE(Matches("world"));
|
||||||
|
|
||||||
|
// Good: 2 blocks * 16 bytes / segment * 8 columns = 256 bytes
|
||||||
|
OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false));
|
||||||
|
ASSERT_FALSE(Matches("hello"));
|
||||||
|
ASSERT_FALSE(Matches("world"));
|
||||||
|
|
||||||
|
// Surprisingly OK: 5000 blocks (640,000 slots) in only 1024 bits
|
||||||
|
// -> average close to 0 columns
|
||||||
|
OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false));
|
||||||
|
// *Almost* all FPs
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
// Need many queries to find a "true negative"
|
||||||
|
for (int i = 0; Matches(ToString(i)); ++i) {
|
||||||
|
ASSERT_LT(i, 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bad: 1 block not allowed (for implementation detail reasons)
|
||||||
|
OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
|
|
||||||
|
// Bad: 0 blocks not allowed
|
||||||
|
OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false));
|
||||||
|
ASSERT_TRUE(Matches("hello"));
|
||||||
|
ASSERT_TRUE(Matches("world"));
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
|
INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
|
||||||
|
Loading…
Reference in New Issue
Block a user