Improve bloom_test bits_per_key flag (#8093)

Summary:
Improved handling of -bits_per_key other than 10, but at least
the OptimizeForMemory test is simply not designed for generally handling
other settings. (ribbon_test does have a statistical framework for this
kind of testing, but it's not important to do that same for Bloom right
now.)

Closes https://github.com/facebook/rocksdb/issues/7019

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8093

Test Plan: for I in `seq 1 20`; do ./bloom_test --gtest_filter=-*OptimizeForMemory* --bits_per_key=$I &> /dev/null || echo FAILED; done

Reviewed By: mrambacher

Differential Revision: D27275875

Pulled By: pdillinger

fbshipit-source-id: 7362e8ac2c41ea11f639412e4f30c8b375f04388
This commit is contained in:
Peter Dillinger 2021-03-23 21:41:15 -07:00 committed by Facebook GitHub Bot
parent 41e554da2b
commit da6b90ab48

View File

@ -30,6 +30,9 @@ int main() {
using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::ParseCommandLineFlags;
// The test is not fully designed for bits_per_key other than 10, but with
// this parameter you can easily explore the behavior of other bits_per_key.
// See also filter_bench.
DEFINE_int32(bits_per_key, 10, ""); DEFINE_int32(bits_per_key, 10, "");
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -158,7 +161,8 @@ TEST_F(BlockBasedBloomTest, VaryingLengths) {
} }
Build(); Build();
ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length; ASSERT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) + 40))
<< length;
// All added keys must match // All added keys must match
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
@ -172,11 +176,16 @@ TEST_F(BlockBasedBloomTest, VaryingLengths) {
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
rate*100.0, length, static_cast<int>(FilterSize())); rate*100.0, length, static_cast<int>(FilterSize()));
} }
if (FLAGS_bits_per_key == 10) {
ASSERT_LE(rate, 0.02); // Must not be over 2% ASSERT_LE(rate, 0.02); // Must not be over 2%
if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often if (rate > 0.0125) {
else good_filters++; mediocre_filters++; // Allowed, but not too often
} else {
good_filters++;
} }
if (kVerbose >= 1) { }
}
if (FLAGS_bits_per_key == 10 && kVerbose >= 1) {
fprintf(stderr, "Filters: %d good, %d mediocre\n", fprintf(stderr, "Filters: %d good, %d mediocre\n",
good_filters, mediocre_filters); good_filters, mediocre_filters);
} }
@ -481,8 +490,8 @@ TEST_P(FullBloomTest, FullVaryingLengths) {
} }
Build(); Build();
EXPECT_LE(FilterSize(), EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) +
(size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5)); CACHE_LINE_SIZE * 2 + 5));
// All added keys must match // All added keys must match
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
@ -496,12 +505,15 @@ TEST_P(FullBloomTest, FullVaryingLengths) {
fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
rate*100.0, length, static_cast<int>(FilterSize())); rate*100.0, length, static_cast<int>(FilterSize()));
} }
if (FLAGS_bits_per_key == 10) {
EXPECT_LE(rate, 0.02); // Must not be over 2% EXPECT_LE(rate, 0.02); // Must not be over 2%
if (rate > 0.0125) if (rate > 0.0125) {
mediocre_filters++; // Allowed, but not too often mediocre_filters++; // Allowed, but not too often
else } else {
good_filters++; good_filters++;
} }
}
}
if (kVerbose >= 1) { if (kVerbose >= 1) {
fprintf(stderr, "Filters: %d good, %d mediocre\n", fprintf(stderr, "Filters: %d good, %d mediocre\n",
good_filters, mediocre_filters); good_filters, mediocre_filters);
@ -538,8 +550,10 @@ TEST_P(FullBloomTest, OptimizeForMemory) {
total_keys += nkeys; total_keys += nkeys;
total_fp_rate += FalsePositiveRate(); total_fp_rate += FalsePositiveRate();
} }
if (FLAGS_bits_per_key == 10) {
EXPECT_LE(total_fp_rate / double{nfilters}, 0.011); EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
EXPECT_GE(total_fp_rate / double{nfilters}, 0.008); EXPECT_GE(total_fp_rate / double{nfilters}, 0.008);
}
int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8; int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) { if (GetParam() == BloomFilterPolicy::kStandard128Ribbon) {