rocksdb/table/block_based/filter_policy.cc
mrambacher 618bf638aa Add Functions to OptionTypeInfo (#6422)
Summary:
Added functions for parsing, serializing, and comparing elements to OptionTypeInfo.  These functions allow all of the special cases that could not be handled directly in the map of OptionTypeInfo to be moved into the map.  Using these functions, every type can be handled via the map rather than special cased.

By adding these functions, the code for handling options can become more standardized (fewer special cases) and (eventually) handled completely by common classes.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6422

Test Plan: pass make check

Reviewed By: siying

Differential Revision: D21269005

Pulled By: zhichao-cao

fbshipit-source-id: 9ba71c721a38ebf9ee88259d60bd81b3282b9077
2020-04-28 18:04:26 -07:00

790 lines
28 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <array>
#include <deque>
#include "rocksdb/filter_policy.h"
#include "rocksdb/slice.h"
#include "table/block_based/block_based_filter_block.h"
#include "table/block_based/full_filter_block.h"
#include "table/block_based/filter_policy_internal.h"
#include "third-party/folly/folly/ConstexprMath.h"
#include "util/bloom_impl.h"
#include "util/coding.h"
#include "util/hash.h"
namespace ROCKSDB_NAMESPACE {
namespace {
// See description in FastLocalBloomImpl
class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
public:
explicit FastLocalBloomBitsBuilder(const int millibits_per_key)
: millibits_per_key_(millibits_per_key),
num_probes_(FastLocalBloomImpl::ChooseNumProbes(millibits_per_key_)) {
assert(millibits_per_key >= 1000);
}
// No Copy allowed
FastLocalBloomBitsBuilder(const FastLocalBloomBitsBuilder&) = delete;
void operator=(const FastLocalBloomBitsBuilder&) = delete;
~FastLocalBloomBitsBuilder() override {}
virtual void AddKey(const Slice& key) override {
uint64_t hash = GetSliceHash64(key);
if (hash_entries_.empty() || hash != hash_entries_.back()) {
hash_entries_.push_back(hash);
}
}
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
uint32_t len_with_metadata =
CalculateSpace(static_cast<uint32_t>(hash_entries_.size()));
char* data = new char[len_with_metadata];
memset(data, 0, len_with_metadata);
assert(data);
assert(len_with_metadata >= 5);
uint32_t len = len_with_metadata - 5;
if (len > 0) {
AddAllEntries(data, len);
}
// See BloomFilterPolicy::GetBloomBitsReader re: metadata
// -1 = Marker for newer Bloom implementations
data[len] = static_cast<char>(-1);
// 0 = Marker for this sub-implementation
data[len + 1] = static_cast<char>(0);
// num_probes (and 0 in upper bits for 64-byte block size)
data[len + 2] = static_cast<char>(num_probes_);
// rest of metadata stays zero
const char* const_data = data;
buf->reset(const_data);
assert(hash_entries_.empty());
return Slice(data, len_with_metadata);
}
int CalculateNumEntry(const uint32_t bytes) override {
uint32_t bytes_no_meta = bytes >= 5u ? bytes - 5u : 0;
return static_cast<int>(uint64_t{8000} * bytes_no_meta /
millibits_per_key_);
}
uint32_t CalculateSpace(const int num_entry) override {
uint32_t num_cache_lines = 0;
if (millibits_per_key_ > 0 && num_entry > 0) {
num_cache_lines = static_cast<uint32_t>(
(int64_t{num_entry} * millibits_per_key_ + 511999) / 512000);
}
return num_cache_lines * 64 + /*metadata*/ 5;
}
double EstimatedFpRate(size_t keys, size_t bytes) override {
return FastLocalBloomImpl::EstimatedFpRate(keys, bytes - /*metadata*/ 5,
num_probes_, /*hash bits*/ 64);
}
private:
void AddAllEntries(char* data, uint32_t len) {
// Simple version without prefetching:
//
// for (auto h : hash_entries_) {
// FastLocalBloomImpl::AddHash(Lower32of64(h), Upper32of64(h), len,
// num_probes_, data);
// }
const size_t num_entries = hash_entries_.size();
constexpr size_t kBufferMask = 7;
static_assert(((kBufferMask + 1) & kBufferMask) == 0,
"Must be power of 2 minus 1");
std::array<uint32_t, kBufferMask + 1> hashes;
std::array<uint32_t, kBufferMask + 1> byte_offsets;
// Prime the buffer
size_t i = 0;
for (; i <= kBufferMask && i < num_entries; ++i) {
uint64_t h = hash_entries_.front();
hash_entries_.pop_front();
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
/*out*/ &byte_offsets[i]);
hashes[i] = Upper32of64(h);
}
// Process and buffer
for (; i < num_entries; ++i) {
uint32_t& hash_ref = hashes[i & kBufferMask];
uint32_t& byte_offset_ref = byte_offsets[i & kBufferMask];
// Process (add)
FastLocalBloomImpl::AddHashPrepared(hash_ref, num_probes_,
data + byte_offset_ref);
// And buffer
uint64_t h = hash_entries_.front();
hash_entries_.pop_front();
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
/*out*/ &byte_offset_ref);
hash_ref = Upper32of64(h);
}
// Finish processing
for (i = 0; i <= kBufferMask && i < num_entries; ++i) {
FastLocalBloomImpl::AddHashPrepared(hashes[i], num_probes_,
data + byte_offsets[i]);
}
}
int millibits_per_key_;
int num_probes_;
// A deque avoids unnecessary copying of already-saved values
// and has near-minimal peak memory use.
std::deque<uint64_t> hash_entries_;
};
// See description in FastLocalBloomImpl
class FastLocalBloomBitsReader : public FilterBitsReader {
public:
FastLocalBloomBitsReader(const char* data, int num_probes, uint32_t len_bytes)
: data_(data), num_probes_(num_probes), len_bytes_(len_bytes) {}
// No Copy allowed
FastLocalBloomBitsReader(const FastLocalBloomBitsReader&) = delete;
void operator=(const FastLocalBloomBitsReader&) = delete;
~FastLocalBloomBitsReader() override {}
bool MayMatch(const Slice& key) override {
uint64_t h = GetSliceHash64(key);
uint32_t byte_offset;
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_,
/*out*/ &byte_offset);
return FastLocalBloomImpl::HashMayMatchPrepared(Upper32of64(h), num_probes_,
data_ + byte_offset);
}
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
for (int i = 0; i < num_keys; ++i) {
uint64_t h = GetSliceHash64(*keys[i]);
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_,
/*out*/ &byte_offsets[i]);
hashes[i] = Upper32of64(h);
}
for (int i = 0; i < num_keys; ++i) {
may_match[i] = FastLocalBloomImpl::HashMayMatchPrepared(
hashes[i], num_probes_, data_ + byte_offsets[i]);
}
}
private:
const char* data_;
const int num_probes_;
const uint32_t len_bytes_;
};
using LegacyBloomImpl = LegacyLocalityBloomImpl</*ExtraRotates*/ false>;
class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
public:
explicit LegacyBloomBitsBuilder(const int bits_per_key, Logger* info_log);
// No Copy allowed
LegacyBloomBitsBuilder(const LegacyBloomBitsBuilder&) = delete;
void operator=(const LegacyBloomBitsBuilder&) = delete;
~LegacyBloomBitsBuilder() override;
void AddKey(const Slice& key) override;
Slice Finish(std::unique_ptr<const char[]>* buf) override;
int CalculateNumEntry(const uint32_t bytes) override;
uint32_t CalculateSpace(const int num_entry) override {
uint32_t dont_care1;
uint32_t dont_care2;
return CalculateSpace(num_entry, &dont_care1, &dont_care2);
}
double EstimatedFpRate(size_t keys, size_t bytes) override {
return LegacyBloomImpl::EstimatedFpRate(keys, bytes - /*metadata*/ 5,
num_probes_);
}
private:
int bits_per_key_;
int num_probes_;
std::vector<uint32_t> hash_entries_;
Logger* info_log_;
// Get totalbits that optimized for cpu cache line
uint32_t GetTotalBitsForLocality(uint32_t total_bits);
// Reserve space for new filter
char* ReserveSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);
// Implementation-specific variant of public CalculateSpace
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);
// Assuming single threaded access to this function.
void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits);
};
LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key,
Logger* info_log)
: bits_per_key_(bits_per_key),
num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)),
info_log_(info_log) {
assert(bits_per_key_);
}
LegacyBloomBitsBuilder::~LegacyBloomBitsBuilder() {}
void LegacyBloomBitsBuilder::AddKey(const Slice& key) {
uint32_t hash = BloomHash(key);
if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
hash_entries_.push_back(hash);
}
}
Slice LegacyBloomBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) {
uint32_t total_bits, num_lines;
size_t num_entries = hash_entries_.size();
char* data =
ReserveSpace(static_cast<int>(num_entries), &total_bits, &num_lines);
assert(data);
if (total_bits != 0 && num_lines != 0) {
for (auto h : hash_entries_) {
AddHash(h, data, num_lines, total_bits);
}
// Check for excessive entries for 32-bit hash function
if (num_entries >= /* minimum of 3 million */ 3000000U) {
// More specifically, we can detect that the 32-bit hash function
// is causing significant increase in FP rate by comparing current
// estimated FP rate to what we would get with a normal number of
// keys at same memory ratio.
double est_fp_rate = LegacyBloomImpl::EstimatedFpRate(
num_entries, total_bits / 8, num_probes_);
double vs_fp_rate = LegacyBloomImpl::EstimatedFpRate(
1U << 16, (1U << 16) * bits_per_key_ / 8, num_probes_);
if (est_fp_rate >= 1.50 * vs_fp_rate) {
// For more details, see
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
ROCKS_LOG_WARN(
info_log_,
"Using legacy SST/BBT Bloom filter with excessive key count "
"(%.1fM @ %dbpk), causing estimated %.1fx higher filter FP rate. "
"Consider using new Bloom with format_version>=5, smaller SST "
"file size, or partitioned filters.",
num_entries / 1000000.0, bits_per_key_, est_fp_rate / vs_fp_rate);
}
}
}
// See BloomFilterPolicy::GetFilterBitsReader for metadata
data[total_bits / 8] = static_cast<char>(num_probes_);
EncodeFixed32(data + total_bits / 8 + 1, static_cast<uint32_t>(num_lines));
const char* const_data = data;
buf->reset(const_data);
hash_entries_.clear();
return Slice(data, total_bits / 8 + 5);
}
uint32_t LegacyBloomBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
uint32_t num_lines =
(total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
// Make num_lines an odd number to make sure more bits are involved
// when determining which block.
if (num_lines % 2 == 0) {
num_lines++;
}
return num_lines * (CACHE_LINE_SIZE * 8);
}
uint32_t LegacyBloomBitsBuilder::CalculateSpace(const int num_entry,
uint32_t* total_bits,
uint32_t* num_lines) {
assert(bits_per_key_);
if (num_entry != 0) {
uint32_t total_bits_tmp = static_cast<uint32_t>(num_entry * bits_per_key_);
*total_bits = GetTotalBitsForLocality(total_bits_tmp);
*num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
assert(*total_bits > 0 && *total_bits % 8 == 0);
} else {
// filter is empty, just leave space for metadata
*total_bits = 0;
*num_lines = 0;
}
// Reserve space for Filter
uint32_t sz = *total_bits / 8;
sz += 5; // 4 bytes for num_lines, 1 byte for num_probes
return sz;
}
char* LegacyBloomBitsBuilder::ReserveSpace(const int num_entry,
uint32_t* total_bits,
uint32_t* num_lines) {
uint32_t sz = CalculateSpace(num_entry, total_bits, num_lines);
char* data = new char[sz];
memset(data, 0, sz);
return data;
}
int LegacyBloomBitsBuilder::CalculateNumEntry(const uint32_t bytes) {
assert(bits_per_key_);
assert(bytes > 0);
int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1);
int low = 1;
int n = high;
for (; n >= low; n--) {
if (CalculateSpace(n) <= bytes) {
break;
}
}
assert(n < high); // High should be an overestimation
return n;
}
inline void LegacyBloomBitsBuilder::AddHash(uint32_t h, char* data,
uint32_t num_lines,
uint32_t total_bits) {
#ifdef NDEBUG
static_cast<void>(total_bits);
#endif
assert(num_lines > 0 && total_bits > 0);
LegacyBloomImpl::AddHash(h, num_lines, num_probes_, data,
folly::constexpr_log2(CACHE_LINE_SIZE));
}
class LegacyBloomBitsReader : public FilterBitsReader {
public:
LegacyBloomBitsReader(const char* data, int num_probes, uint32_t num_lines,
uint32_t log2_cache_line_size)
: data_(data),
num_probes_(num_probes),
num_lines_(num_lines),
log2_cache_line_size_(log2_cache_line_size) {}
// No Copy allowed
LegacyBloomBitsReader(const LegacyBloomBitsReader&) = delete;
void operator=(const LegacyBloomBitsReader&) = delete;
~LegacyBloomBitsReader() override {}
// "contents" contains the data built by a preceding call to
// FilterBitsBuilder::Finish. MayMatch must return true if the key was
// passed to FilterBitsBuilder::AddKey. This method may return true or false
// if the key was not on the list, but it should aim to return false with a
// high probability.
bool MayMatch(const Slice& key) override {
uint32_t hash = BloomHash(key);
uint32_t byte_offset;
LegacyBloomImpl::PrepareHashMayMatch(
hash, num_lines_, data_, /*out*/ &byte_offset, log2_cache_line_size_);
return LegacyBloomImpl::HashMayMatchPrepared(
hash, num_probes_, data_ + byte_offset, log2_cache_line_size_);
}
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
for (int i = 0; i < num_keys; ++i) {
hashes[i] = BloomHash(*keys[i]);
LegacyBloomImpl::PrepareHashMayMatch(hashes[i], num_lines_, data_,
/*out*/ &byte_offsets[i],
log2_cache_line_size_);
}
for (int i = 0; i < num_keys; ++i) {
may_match[i] = LegacyBloomImpl::HashMayMatchPrepared(
hashes[i], num_probes_, data_ + byte_offsets[i],
log2_cache_line_size_);
}
}
private:
const char* data_;
const int num_probes_;
const uint32_t num_lines_;
const uint32_t log2_cache_line_size_;
};
class AlwaysTrueFilter : public FilterBitsReader {
public:
bool MayMatch(const Slice&) override { return true; }
using FilterBitsReader::MayMatch; // inherit overload
};
class AlwaysFalseFilter : public FilterBitsReader {
public:
bool MayMatch(const Slice&) override { return false; }
using FilterBitsReader::MayMatch; // inherit overload
};
} // namespace
const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllFixedImpls = {
kLegacyBloom,
kDeprecatedBlock,
kFastLocalBloom,
};
const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllUserModes = {
kDeprecatedBlock,
kAuto,
};
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
: mode_(mode), warned_(false) {
// Sanitize bits_per_key
if (bits_per_key < 1.0) {
bits_per_key = 1.0;
} else if (!(bits_per_key < 100.0)) { // including NaN
bits_per_key = 100.0;
}
// Includes a nudge toward rounding up, to ensure on all platforms
// that doubles specified with three decimal digits after the decimal
// point are interpreted accurately.
millibits_per_key_ = static_cast<int>(bits_per_key * 1000.0 + 0.500001);
// For better or worse, this is a rounding up of a nudged rounding up,
// e.g. 7.4999999999999 will round up to 8, but that provides more
// predictability against small arithmetic errors in floating point.
whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000;
}
BloomFilterPolicy::~BloomFilterPolicy() {}
const char* BloomFilterPolicy::Name() const {
return "rocksdb.BuiltinBloomFilter";
}
void BloomFilterPolicy::CreateFilter(const Slice* keys, int n,
std::string* dst) const {
// We should ideally only be using this deprecated interface for
// appropriately constructed BloomFilterPolicy
assert(mode_ == kDeprecatedBlock);
// Compute bloom filter size (in both bits and bytes)
uint32_t bits = static_cast<uint32_t>(n * whole_bits_per_key_);
// For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length.
if (bits < 64) bits = 64;
uint32_t bytes = (bits + 7) / 8;
bits = bytes * 8;
int num_probes =
LegacyNoLocalityBloomImpl::ChooseNumProbes(whole_bits_per_key_);
const size_t init_size = dst->size();
dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(num_probes)); // Remember # of probes
char* array = &(*dst)[init_size];
for (int i = 0; i < n; i++) {
LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes,
array);
}
}
bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
const Slice& bloom_filter) const {
const size_t len = bloom_filter.size();
if (len < 2 || len > 0xffffffffU) {
return false;
}
const char* array = bloom_filter.data();
const uint32_t bits = static_cast<uint32_t>(len - 1) * 8;
// Use the encoded k so that we can read filters generated by
// bloom filters created using different parameters.
const int k = static_cast<uint8_t>(array[len - 1]);
if (k > 30) {
// Reserved for potentially new encodings for short bloom filters.
// Consider it a match.
return true;
}
// NB: using stored k not num_probes for whole_bits_per_key_
return LegacyNoLocalityBloomImpl::HashMayMatch(BloomHash(key), bits, k,
array);
}
FilterBitsBuilder* BloomFilterPolicy::GetFilterBitsBuilder() const {
// This code path should no longer be used, for the built-in
// BloomFilterPolicy. Internal to RocksDB and outside
// BloomFilterPolicy, only get a FilterBitsBuilder with
// BloomFilterPolicy::GetBuilderFromContext(), which will call
// BloomFilterPolicy::GetBuilderWithContext(). RocksDB users have
// been warned (HISTORY.md) that they can no longer call this on
// the built-in BloomFilterPolicy (unlikely).
assert(false);
return GetBuilderWithContext(FilterBuildingContext(BlockBasedTableOptions()));
}
FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
Mode cur = mode_;
// Unusual code construction so that we can have just
// one exhaustive switch without (risky) recursion
for (int i = 0; i < 2; ++i) {
switch (cur) {
case kAuto:
if (context.table_options.format_version < 5) {
cur = kLegacyBloom;
} else {
cur = kFastLocalBloom;
}
break;
case kDeprecatedBlock:
return nullptr;
case kFastLocalBloom:
return new FastLocalBloomBitsBuilder(millibits_per_key_);
case kLegacyBloom:
if (whole_bits_per_key_ >= 14 && context.info_log &&
!warned_.load(std::memory_order_relaxed)) {
warned_ = true;
const char* adjective;
if (whole_bits_per_key_ >= 20) {
adjective = "Dramatic";
} else {
adjective = "Significant";
}
// For more details, see
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
ROCKS_LOG_WARN(
context.info_log,
"Using legacy Bloom filter with high (%d) bits/key. "
"%s filter space and/or accuracy improvement is available "
"with format_version>=5.",
whole_bits_per_key_, adjective);
}
return new LegacyBloomBitsBuilder(whole_bits_per_key_,
context.info_log);
}
}
assert(false);
return nullptr; // something legal
}
FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext(
const FilterBuildingContext& context) {
if (context.table_options.filter_policy) {
return context.table_options.filter_policy->GetBuilderWithContext(context);
} else {
return nullptr;
}
}
// Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one.
FilterBitsReader* BloomFilterPolicy::GetFilterBitsReader(
const Slice& contents) const {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
if (len_with_meta <= 5) {
// filter is empty or broken. Treat like zero keys added.
return new AlwaysFalseFilter();
}
// Legacy Bloom filter data:
// 0 +-----------------------------------+
// | Raw Bloom filter data |
// | ... |
// len +-----------------------------------+
// | byte for num_probes or |
// | marker for new implementations |
// len+1 +-----------------------------------+
// | four bytes for number of cache |
// | lines |
// len_with_meta +-----------------------------------+
int8_t raw_num_probes =
static_cast<int8_t>(contents.data()[len_with_meta - 5]);
// NB: *num_probes > 30 and < 128 probably have not been used, because of
// BloomFilterPolicy::initialize, unless directly calling
// LegacyBloomBitsBuilder as an API, but we are leaving those cases in
// limbo with LegacyBloomBitsReader for now.
if (raw_num_probes < 1) {
// Note: < 0 (or unsigned > 127) indicate special new implementations
// (or reserved for future use)
if (raw_num_probes == -1) {
// Marker for newer Bloom implementations
return GetBloomBitsReader(contents);
}
// otherwise
// Treat as zero probes (always FP) for now.
return new AlwaysTrueFilter();
}
// else attempt decode for LegacyBloomBitsReader
int num_probes = raw_num_probes;
assert(num_probes >= 1);
assert(num_probes <= 127);
uint32_t len = len_with_meta - 5;
assert(len > 0);
uint32_t num_lines = DecodeFixed32(contents.data() + len_with_meta - 4);
uint32_t log2_cache_line_size;
if (num_lines * CACHE_LINE_SIZE == len) {
// Common case
log2_cache_line_size = folly::constexpr_log2(CACHE_LINE_SIZE);
} else if (num_lines == 0 || len % num_lines != 0) {
// Invalid (no solution to num_lines * x == len)
// Treat as zero probes (always FP) for now.
return new AlwaysTrueFilter();
} else {
// Determine the non-native cache line size (from another system)
log2_cache_line_size = 0;
while ((num_lines << log2_cache_line_size) < len) {
++log2_cache_line_size;
}
if ((num_lines << log2_cache_line_size) != len) {
// Invalid (block size not a power of two)
// Treat as zero probes (always FP) for now.
return new AlwaysTrueFilter();
}
}
// if not early return
return new LegacyBloomBitsReader(contents.data(), num_probes, num_lines,
log2_cache_line_size);
}
// For newer Bloom filter implementations
FilterBitsReader* BloomFilterPolicy::GetBloomBitsReader(
const Slice& contents) const {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
uint32_t len = len_with_meta - 5;
assert(len > 0); // precondition
// New Bloom filter data:
// 0 +-----------------------------------+
// | Raw Bloom filter data |
// | ... |
// len +-----------------------------------+
// | char{-1} byte -> new Bloom filter |
// len+1 +-----------------------------------+
// | byte for subimplementation |
// | 0: FastLocalBloom |
// | other: reserved |
// len+2 +-----------------------------------+
// | byte for block_and_probes |
// | 0 in top 3 bits -> 6 -> 64-byte |
// | reserved: |
// | 1 in top 3 bits -> 7 -> 128-byte|
// | 2 in top 3 bits -> 8 -> 256-byte|
// | ... |
// | num_probes in bottom 5 bits, |
// | except 0 and 31 reserved |
// len+3 +-----------------------------------+
// | two bytes reserved |
// | possibly for hash seed |
// len_with_meta +-----------------------------------+
// Read more metadata (see above)
char sub_impl_val = contents.data()[len_with_meta - 4];
char block_and_probes = contents.data()[len_with_meta - 3];
int log2_block_bytes = ((block_and_probes >> 5) & 7) + 6;
int num_probes = (block_and_probes & 31);
if (num_probes < 1 || num_probes > 30) {
// Reserved / future safe
return new AlwaysTrueFilter();
}
uint16_t rest = DecodeFixed16(contents.data() + len_with_meta - 2);
if (rest != 0) {
// Reserved, possibly for hash seed
// Future safe
return new AlwaysTrueFilter();
}
if (sub_impl_val == 0) { // FastLocalBloom
if (log2_block_bytes == 6) { // Only block size supported for now
return new FastLocalBloomBitsReader(contents.data(), num_probes, len);
}
}
// otherwise
// Reserved / future safe
return new AlwaysTrueFilter();
}
const FilterPolicy* NewBloomFilterPolicy(double bits_per_key,
bool use_block_based_builder) {
BloomFilterPolicy::Mode m;
if (use_block_based_builder) {
m = BloomFilterPolicy::kDeprecatedBlock;
} else {
m = BloomFilterPolicy::kAuto;
}
assert(std::find(BloomFilterPolicy::kAllUserModes.begin(),
BloomFilterPolicy::kAllUserModes.end(),
m) != BloomFilterPolicy::kAllUserModes.end());
return new BloomFilterPolicy(bits_per_key, m);
}
FilterBuildingContext::FilterBuildingContext(
const BlockBasedTableOptions& _table_options)
: table_options(_table_options) {}
FilterPolicy::~FilterPolicy() { }
Status FilterPolicy::CreateFromString(
const ConfigOptions& /*options*/, const std::string& value,
std::shared_ptr<const FilterPolicy>* policy) {
const std::string kBloomName = "bloomfilter:";
if (value == kNullptrString || value == "rocksdb.BuiltinBloomFilter") {
policy->reset();
#ifndef ROCKSDB_LITE
} else if (value.compare(0, kBloomName.size(), kBloomName) == 0) {
size_t pos = value.find(':', kBloomName.size());
if (pos == std::string::npos) {
return Status::InvalidArgument(
"Invalid filter policy config, missing bits_per_key");
} else {
double bits_per_key = ParseDouble(
trim(value.substr(kBloomName.size(), pos - kBloomName.size())));
bool use_block_based_builder =
ParseBoolean("use_block_based_builder", trim(value.substr(pos + 1)));
policy->reset(
NewBloomFilterPolicy(bits_per_key, use_block_based_builder));
}
} else {
return Status::InvalidArgument("Invalid filter policy name ", value);
#else
} else {
return Status::NotSupported("Cannot load filter policy in LITE mode ",
value);
#endif // ROCKSDB_LITE
}
return Status::OK();
}
} // namespace ROCKSDB_NAMESPACE