Log warning for high bits/key in legacy Bloom filter (#6312)

Summary:
Help users that would benefit most from new Bloom filter
implementation by logging a warning that recommends the using
format_version >= 5.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6312

Test Plan:
$ (for BPK in 10 13 14 19 20 50; do ./filter_bench -quick -impl=0 -bits_per_key=$BPK -m_queries=1 2>&1; done) | grep 'its/key'
    Bits/key actual: 10.0647
    Bits/key actual: 13.0593
    [WARN] [/block_based/filter_policy.cc:546] Using legacy Bloom filter with high (14) bits/key. Significant filter space and/or accuracy improvement is available with format_verion>=5.
    Bits/key actual: 14.0581
    [WARN] [/block_based/filter_policy.cc:546] Using legacy Bloom filter with high (19) bits/key. Significant filter space and/or accuracy improvement is available with format_verion>=5.
    Bits/key actual: 19.0542
    [WARN] [/block_based/filter_policy.cc:546] Using legacy Bloom filter with high (20) bits/key. Dramatic filter space and/or accuracy improvement is available with format_verion>=5.
    Bits/key actual: 20.0584
    [WARN] [/block_based/filter_policy.cc:546] Using legacy Bloom filter with high (50) bits/key. Dramatic filter space and/or accuracy improvement is available with format_verion>=5.
    Bits/key actual: 50.0577

Differential Revision: D19457191

Pulled By: pdillinger

fbshipit-source-id: 073d94cde5c70e03a160f953e1100c15ea83eda4
This commit is contained in:
Peter Dillinger 2020-01-17 19:36:09 -08:00 committed by Facebook Github Bot
parent 931876e86e
commit 4b86fe1123
7 changed files with 33 additions and 1 deletions

BIN
docs/static/images/bloom_fp_vs_bpk.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

View File

@ -101,6 +101,9 @@ struct FilterBuildingContext {
// The table level at time of constructing the SST file, or -1 if unknown.
// (The table file could later be used at a different level.)
int level_at_creation = -1;
// An optional logger for reporting errors, warnings, etc.
Logger* info_log = nullptr;
};
// We add a new format of filter block called full filter block

View File

@ -425,6 +425,7 @@ struct BlockBasedTableBuilder::Rep {
context.column_family_name = column_family_name;
context.compaction_style = ioptions.compaction_style;
context.level_at_creation = level_at_creation;
context.info_log = ioptions.info_log;
filter_builder.reset(CreateFilterBlockBuilder(
ioptions, moptions, context, use_delta_encoding_for_index_values,
p_index_builder_));

View File

@ -419,7 +419,7 @@ const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllUserModes = {
};
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
: mode_(mode) {
: mode_(mode), warned_(false) {
// Sanitize bits_per_key
if (bits_per_key < 1.0) {
bits_per_key = 1.0;
@ -527,6 +527,24 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
case kFastLocalBloom:
return new FastLocalBloomBitsBuilder(millibits_per_key_);
case kLegacyBloom:
if (whole_bits_per_key_ >= 14 && context.info_log &&
!warned_.load(std::memory_order_relaxed)) {
warned_ = true;
const char* adjective;
if (whole_bits_per_key_ >= 20) {
adjective = "Dramatic";
} else {
adjective = "Significant";
}
// For more details, see
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
ROCKS_LOG_WARN(
context.info_log,
"Using legacy Bloom filter with high (%d) bits/key. "
"%s filter space and/or accuracy improvement is available "
"with format_version>=5.",
whole_bits_per_key_, adjective);
}
return new LegacyBloomBitsBuilder(whole_bits_per_key_);
}
}

View File

@ -8,6 +8,7 @@
#pragma once
#include <atomic>
#include <memory>
#include <string>
#include <vector>
@ -125,6 +126,10 @@ class BloomFilterPolicy : public FilterPolicy {
// implementation) for building new SST filters.
Mode mode_;
// Whether relevant warnings have been logged already. (Remember so we
// only report once per BloomFilterPolicy instance, to keep the noise down.)
mutable std::atomic<bool> warned_;
// For newer Bloom filter implementation(s)
FilterBitsReader* GetBloomBitsReader(const Slice& contents) const;
};

View File

@ -47,6 +47,7 @@ class MockBlockBasedTableTester {
context.column_family_name = "mock_cf";
context.compaction_style = ioptions_.compaction_style;
context.level_at_creation = kMockLevel;
context.info_log = ioptions_.info_log;
return BloomFilterPolicy::GetBuilderFromContext(context);
}
};

View File

@ -26,6 +26,7 @@ int main() {
#include "util/gflags_compat.h"
#include "util/hash.h"
#include "util/random.h"
#include "util/stderr_logger.h"
#include "util/stop_watch.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
@ -112,6 +113,7 @@ using rocksdb::ParsedFullFilterBlock;
using rocksdb::PlainTableBloomV1;
using rocksdb::Random32;
using rocksdb::Slice;
using rocksdb::StderrLogger;
using rocksdb::mock::MockBlockBasedTableTester;
struct KeyMaker {
@ -243,6 +245,7 @@ struct FilterBench : public MockBlockBasedTableTester {
Random32 random_;
std::ostringstream fp_rate_report_;
Arena arena_;
StderrLogger stderr_logger_;
FilterBench()
: MockBlockBasedTableTester(new BloomFilterPolicy(
@ -252,6 +255,7 @@ struct FilterBench : public MockBlockBasedTableTester {
for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {
kms_.emplace_back(FLAGS_key_size < 8 ? 8 : FLAGS_key_size);
}
ioptions_.info_log = &stderr_logger_;
}
void Go();