rocksdb/table/block_based/filter_policy.cc
Peter Dillinger a92bd0a183 Optimize memory and CPU for building new Bloom filter (#6175)
Summary:
The filter bits builder collects all the hashes to add in memory before adding them (because the number of keys is not known until we've walked over all the keys). Existing code uses a std::vector for this, which can mean up to 2x than necessary space allocated (and not freed) and up to ~2x write amplification in memory. Using std::deque uses close to minimal space (for large filters, the only time it matters), no write amplification, frees memory while building, and no need for large contiguous memory area. The only cost is more calls to allocator, which does not appear to matter, at least in benchmark test.

For now, this change only applies to the new (format_version=5) Bloom filter implementation, to ease before-and-after comparison downstream.

Temporary memory use during build is about the only way the new Bloom filter could regress vs. the old (because of upgrade to 64-bit hash) and that should only matter for full filters. This change should largely mitigate that potential regression.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6175

Test Plan:
Using filter_bench with -new_builder option and 6M keys per filter is like large full filter (improvement). 10k keys and no -new_builder is like partitioned filters (about the same). (Corresponding configurations run simultaneously on devserver.)

std::vector impl (before)

    $ /usr/bin/time -v ./filter_bench -impl=2 -quick -new_builder -working_mem_size_mb=1000 -
    average_keys_per_filter=6000000
    Build avg ns/key: 52.2027
    Maximum resident set size (kbytes): 1105016
    $ /usr/bin/time -v ./filter_bench -impl=2 -quick -working_mem_size_mb=1000 -
    average_keys_per_filter=10000
    Build avg ns/key: 30.5694
    Maximum resident set size (kbytes): 1208152

std::deque impl (after)

    $ /usr/bin/time -v ./filter_bench -impl=2 -quick -new_builder -working_mem_size_mb=1000 -
    average_keys_per_filter=6000000
    Build avg ns/key: 39.0697
    Maximum resident set size (kbytes): 1087196
    $ /usr/bin/time -v ./filter_bench -impl=2 -quick -working_mem_size_mb=1000 -
    average_keys_per_filter=10000
    Build avg ns/key: 30.9348
    Maximum resident set size (kbytes): 1207980

Differential Revision: D19053431

Pulled By: pdillinger

fbshipit-source-id: 2888e748723a19d9ea40403934f13cbb8483430c
2019-12-15 21:31:08 -08:00

703 lines
24 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <array>
#include <deque>
#include "rocksdb/filter_policy.h"
#include "rocksdb/slice.h"
#include "table/block_based/block_based_filter_block.h"
#include "table/block_based/full_filter_block.h"
#include "table/block_based/filter_policy_internal.h"
#include "third-party/folly/folly/ConstexprMath.h"
#include "util/bloom_impl.h"
#include "util/coding.h"
#include "util/hash.h"
namespace rocksdb {
namespace {
// See description in FastLocalBloomImpl
class FastLocalBloomBitsBuilder : public BuiltinFilterBitsBuilder {
public:
explicit FastLocalBloomBitsBuilder(const int millibits_per_key)
: millibits_per_key_(millibits_per_key),
num_probes_(FastLocalBloomImpl::ChooseNumProbes(millibits_per_key_)) {
assert(millibits_per_key >= 1000);
}
// No Copy allowed
FastLocalBloomBitsBuilder(const FastLocalBloomBitsBuilder&) = delete;
void operator=(const FastLocalBloomBitsBuilder&) = delete;
~FastLocalBloomBitsBuilder() override {}
virtual void AddKey(const Slice& key) override {
uint64_t hash = GetSliceHash64(key);
if (hash_entries_.empty() || hash != hash_entries_.back()) {
hash_entries_.push_back(hash);
}
}
virtual Slice Finish(std::unique_ptr<const char[]>* buf) override {
uint32_t len_with_metadata =
CalculateSpace(static_cast<uint32_t>(hash_entries_.size()));
char* data = new char[len_with_metadata];
memset(data, 0, len_with_metadata);
assert(data);
assert(len_with_metadata >= 5);
uint32_t len = len_with_metadata - 5;
if (len > 0) {
AddAllEntries(data, len);
}
// See BloomFilterPolicy::GetBloomBitsReader re: metadata
// -1 = Marker for newer Bloom implementations
data[len] = static_cast<char>(-1);
// 0 = Marker for this sub-implementation
data[len + 1] = static_cast<char>(0);
// num_probes (and 0 in upper bits for 64-byte block size)
data[len + 2] = static_cast<char>(num_probes_);
// rest of metadata stays zero
const char* const_data = data;
buf->reset(const_data);
assert(hash_entries_.empty());
return Slice(data, len_with_metadata);
}
int CalculateNumEntry(const uint32_t bytes) override {
uint32_t bytes_no_meta = bytes >= 5u ? bytes - 5u : 0;
return static_cast<int>(uint64_t{8000} * bytes_no_meta /
millibits_per_key_);
}
uint32_t CalculateSpace(const int num_entry) override {
uint32_t num_cache_lines = 0;
if (millibits_per_key_ > 0 && num_entry > 0) {
num_cache_lines = static_cast<uint32_t>(
(int64_t{num_entry} * millibits_per_key_ + 511999) / 512000);
}
return num_cache_lines * 64 + /*metadata*/ 5;
}
private:
void AddAllEntries(char* data, uint32_t len) {
// Simple version without prefetching:
//
// for (auto h : hash_entries_) {
// FastLocalBloomImpl::AddHash(Lower32of64(h), Upper32of64(h), len,
// num_probes_, data);
// }
const size_t num_entries = hash_entries_.size();
constexpr size_t kBufferMask = 7;
static_assert(((kBufferMask + 1) & kBufferMask) == 0,
"Must be power of 2 minus 1");
std::array<uint32_t, kBufferMask + 1> hashes;
std::array<uint32_t, kBufferMask + 1> byte_offsets;
// Prime the buffer
size_t i = 0;
for (; i <= kBufferMask && i < num_entries; ++i) {
uint64_t h = hash_entries_.front();
hash_entries_.pop_front();
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
/*out*/ &byte_offsets[i]);
hashes[i] = Upper32of64(h);
}
// Process and buffer
for (; i < num_entries; ++i) {
uint32_t& hash_ref = hashes[i & kBufferMask];
uint32_t& byte_offset_ref = byte_offsets[i & kBufferMask];
// Process (add)
FastLocalBloomImpl::AddHashPrepared(hash_ref, num_probes_,
data + byte_offset_ref);
// And buffer
uint64_t h = hash_entries_.front();
hash_entries_.pop_front();
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len, data,
/*out*/ &byte_offset_ref);
hash_ref = Upper32of64(h);
}
// Finish processing
for (i = 0; i <= kBufferMask && i < num_entries; ++i) {
FastLocalBloomImpl::AddHashPrepared(hashes[i], num_probes_,
data + byte_offsets[i]);
}
}
int millibits_per_key_;
int num_probes_;
// A deque avoids unnecessary copying of already-saved values
// and has near-minimal peak memory use.
std::deque<uint64_t> hash_entries_;
};
// See description in FastLocalBloomImpl
class FastLocalBloomBitsReader : public FilterBitsReader {
public:
FastLocalBloomBitsReader(const char* data, int num_probes, uint32_t len_bytes)
: data_(data), num_probes_(num_probes), len_bytes_(len_bytes) {}
// No Copy allowed
FastLocalBloomBitsReader(const FastLocalBloomBitsReader&) = delete;
void operator=(const FastLocalBloomBitsReader&) = delete;
~FastLocalBloomBitsReader() override {}
bool MayMatch(const Slice& key) override {
uint64_t h = GetSliceHash64(key);
uint32_t byte_offset;
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_,
/*out*/ &byte_offset);
return FastLocalBloomImpl::HashMayMatchPrepared(Upper32of64(h), num_probes_,
data_ + byte_offset);
}
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
for (int i = 0; i < num_keys; ++i) {
uint64_t h = GetSliceHash64(*keys[i]);
FastLocalBloomImpl::PrepareHash(Lower32of64(h), len_bytes_, data_,
/*out*/ &byte_offsets[i]);
hashes[i] = Upper32of64(h);
}
for (int i = 0; i < num_keys; ++i) {
may_match[i] = FastLocalBloomImpl::HashMayMatchPrepared(
hashes[i], num_probes_, data_ + byte_offsets[i]);
}
}
private:
const char* data_;
const int num_probes_;
const uint32_t len_bytes_;
};
using LegacyBloomImpl = LegacyLocalityBloomImpl</*ExtraRotates*/ false>;
class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
public:
explicit LegacyBloomBitsBuilder(const int bits_per_key);
// No Copy allowed
LegacyBloomBitsBuilder(const LegacyBloomBitsBuilder&) = delete;
void operator=(const LegacyBloomBitsBuilder&) = delete;
~LegacyBloomBitsBuilder() override;
void AddKey(const Slice& key) override;
Slice Finish(std::unique_ptr<const char[]>* buf) override;
int CalculateNumEntry(const uint32_t bytes) override;
uint32_t CalculateSpace(const int num_entry) override {
uint32_t dont_care1;
uint32_t dont_care2;
return CalculateSpace(num_entry, &dont_care1, &dont_care2);
}
private:
int bits_per_key_;
int num_probes_;
std::vector<uint32_t> hash_entries_;
// Get totalbits that optimized for cpu cache line
uint32_t GetTotalBitsForLocality(uint32_t total_bits);
// Reserve space for new filter
char* ReserveSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);
// Implementation-specific variant of public CalculateSpace
uint32_t CalculateSpace(const int num_entry, uint32_t* total_bits,
uint32_t* num_lines);
// Assuming single threaded access to this function.
void AddHash(uint32_t h, char* data, uint32_t num_lines, uint32_t total_bits);
};
LegacyBloomBitsBuilder::LegacyBloomBitsBuilder(const int bits_per_key)
: bits_per_key_(bits_per_key),
num_probes_(LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key_)) {
assert(bits_per_key_);
}
LegacyBloomBitsBuilder::~LegacyBloomBitsBuilder() {}
void LegacyBloomBitsBuilder::AddKey(const Slice& key) {
uint32_t hash = BloomHash(key);
if (hash_entries_.size() == 0 || hash != hash_entries_.back()) {
hash_entries_.push_back(hash);
}
}
Slice LegacyBloomBitsBuilder::Finish(std::unique_ptr<const char[]>* buf) {
uint32_t total_bits, num_lines;
char* data = ReserveSpace(static_cast<int>(hash_entries_.size()), &total_bits,
&num_lines);
assert(data);
if (total_bits != 0 && num_lines != 0) {
for (auto h : hash_entries_) {
AddHash(h, data, num_lines, total_bits);
}
}
// See BloomFilterPolicy::GetFilterBitsReader for metadata
data[total_bits / 8] = static_cast<char>(num_probes_);
EncodeFixed32(data + total_bits / 8 + 1, static_cast<uint32_t>(num_lines));
const char* const_data = data;
buf->reset(const_data);
hash_entries_.clear();
return Slice(data, total_bits / 8 + 5);
}
uint32_t LegacyBloomBitsBuilder::GetTotalBitsForLocality(uint32_t total_bits) {
uint32_t num_lines =
(total_bits + CACHE_LINE_SIZE * 8 - 1) / (CACHE_LINE_SIZE * 8);
// Make num_lines an odd number to make sure more bits are involved
// when determining which block.
if (num_lines % 2 == 0) {
num_lines++;
}
return num_lines * (CACHE_LINE_SIZE * 8);
}
uint32_t LegacyBloomBitsBuilder::CalculateSpace(const int num_entry,
uint32_t* total_bits,
uint32_t* num_lines) {
assert(bits_per_key_);
if (num_entry != 0) {
uint32_t total_bits_tmp = static_cast<uint32_t>(num_entry * bits_per_key_);
*total_bits = GetTotalBitsForLocality(total_bits_tmp);
*num_lines = *total_bits / (CACHE_LINE_SIZE * 8);
assert(*total_bits > 0 && *total_bits % 8 == 0);
} else {
// filter is empty, just leave space for metadata
*total_bits = 0;
*num_lines = 0;
}
// Reserve space for Filter
uint32_t sz = *total_bits / 8;
sz += 5; // 4 bytes for num_lines, 1 byte for num_probes
return sz;
}
char* LegacyBloomBitsBuilder::ReserveSpace(const int num_entry,
uint32_t* total_bits,
uint32_t* num_lines) {
uint32_t sz = CalculateSpace(num_entry, total_bits, num_lines);
char* data = new char[sz];
memset(data, 0, sz);
return data;
}
int LegacyBloomBitsBuilder::CalculateNumEntry(const uint32_t bytes) {
assert(bits_per_key_);
assert(bytes > 0);
int high = static_cast<int>(bytes * 8 / bits_per_key_ + 1);
int low = 1;
int n = high;
for (; n >= low; n--) {
if (CalculateSpace(n) <= bytes) {
break;
}
}
assert(n < high); // High should be an overestimation
return n;
}
inline void LegacyBloomBitsBuilder::AddHash(uint32_t h, char* data,
uint32_t num_lines,
uint32_t total_bits) {
#ifdef NDEBUG
static_cast<void>(total_bits);
#endif
assert(num_lines > 0 && total_bits > 0);
LegacyBloomImpl::AddHash(h, num_lines, num_probes_, data,
folly::constexpr_log2(CACHE_LINE_SIZE));
}
class LegacyBloomBitsReader : public FilterBitsReader {
public:
LegacyBloomBitsReader(const char* data, int num_probes, uint32_t num_lines,
uint32_t log2_cache_line_size)
: data_(data),
num_probes_(num_probes),
num_lines_(num_lines),
log2_cache_line_size_(log2_cache_line_size) {}
// No Copy allowed
LegacyBloomBitsReader(const LegacyBloomBitsReader&) = delete;
void operator=(const LegacyBloomBitsReader&) = delete;
~LegacyBloomBitsReader() override {}
// "contents" contains the data built by a preceding call to
// FilterBitsBuilder::Finish. MayMatch must return true if the key was
// passed to FilterBitsBuilder::AddKey. This method may return true or false
// if the key was not on the list, but it should aim to return false with a
// high probability.
bool MayMatch(const Slice& key) override {
uint32_t hash = BloomHash(key);
uint32_t byte_offset;
LegacyBloomImpl::PrepareHashMayMatch(
hash, num_lines_, data_, /*out*/ &byte_offset, log2_cache_line_size_);
return LegacyBloomImpl::HashMayMatchPrepared(
hash, num_probes_, data_ + byte_offset, log2_cache_line_size_);
}
virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) override {
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
for (int i = 0; i < num_keys; ++i) {
hashes[i] = BloomHash(*keys[i]);
LegacyBloomImpl::PrepareHashMayMatch(hashes[i], num_lines_, data_,
/*out*/ &byte_offsets[i],
log2_cache_line_size_);
}
for (int i = 0; i < num_keys; ++i) {
may_match[i] = LegacyBloomImpl::HashMayMatchPrepared(
hashes[i], num_probes_, data_ + byte_offsets[i],
log2_cache_line_size_);
}
}
private:
const char* data_;
const int num_probes_;
const uint32_t num_lines_;
const uint32_t log2_cache_line_size_;
};
class AlwaysTrueFilter : public FilterBitsReader {
public:
bool MayMatch(const Slice&) override { return true; }
using FilterBitsReader::MayMatch; // inherit overload
};
class AlwaysFalseFilter : public FilterBitsReader {
public:
bool MayMatch(const Slice&) override { return false; }
using FilterBitsReader::MayMatch; // inherit overload
};
} // namespace
const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllFixedImpls = {
kLegacyBloom,
kDeprecatedBlock,
kFastLocalBloom,
};
const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllUserModes = {
kDeprecatedBlock,
kAuto,
};
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
: mode_(mode) {
// Sanitize bits_per_key
if (bits_per_key < 1.0) {
bits_per_key = 1.0;
} else if (!(bits_per_key < 100.0)) { // including NaN
bits_per_key = 100.0;
}
// Includes a nudge toward rounding up, to ensure on all platforms
// that doubles specified with three decimal digits after the decimal
// point are interpreted accurately.
millibits_per_key_ = static_cast<int>(bits_per_key * 1000.0 + 0.500001);
// For better or worse, this is a rounding up of a nudged rounding up,
// e.g. 7.4999999999999 will round up to 8, but that provides more
// predictability against small arithmetic errors in floating point.
whole_bits_per_key_ = (millibits_per_key_ + 500) / 1000;
}
BloomFilterPolicy::~BloomFilterPolicy() {}
const char* BloomFilterPolicy::Name() const {
return "rocksdb.BuiltinBloomFilter";
}
void BloomFilterPolicy::CreateFilter(const Slice* keys, int n,
std::string* dst) const {
// We should ideally only be using this deprecated interface for
// appropriately constructed BloomFilterPolicy
assert(mode_ == kDeprecatedBlock);
// Compute bloom filter size (in both bits and bytes)
uint32_t bits = static_cast<uint32_t>(n * whole_bits_per_key_);
// For small n, we can see a very high false positive rate. Fix it
// by enforcing a minimum bloom filter length.
if (bits < 64) bits = 64;
uint32_t bytes = (bits + 7) / 8;
bits = bytes * 8;
int num_probes =
LegacyNoLocalityBloomImpl::ChooseNumProbes(whole_bits_per_key_);
const size_t init_size = dst->size();
dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(num_probes)); // Remember # of probes
char* array = &(*dst)[init_size];
for (int i = 0; i < n; i++) {
LegacyNoLocalityBloomImpl::AddHash(BloomHash(keys[i]), bits, num_probes,
array);
}
}
bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
const Slice& bloom_filter) const {
const size_t len = bloom_filter.size();
if (len < 2 || len > 0xffffffffU) {
return false;
}
const char* array = bloom_filter.data();
const uint32_t bits = static_cast<uint32_t>(len - 1) * 8;
// Use the encoded k so that we can read filters generated by
// bloom filters created using different parameters.
const int k = static_cast<uint8_t>(array[len - 1]);
if (k > 30) {
// Reserved for potentially new encodings for short bloom filters.
// Consider it a match.
return true;
}
// NB: using stored k not num_probes for whole_bits_per_key_
return LegacyNoLocalityBloomImpl::HashMayMatch(BloomHash(key), bits, k,
array);
}
FilterBitsBuilder* BloomFilterPolicy::GetFilterBitsBuilder() const {
// This code path should no longer be used, for the built-in
// BloomFilterPolicy. Internal to RocksDB and outside
// BloomFilterPolicy, only get a FilterBitsBuilder with
// BloomFilterPolicy::GetBuilderFromContext(), which will call
// BloomFilterPolicy::GetBuilderWithContext(). RocksDB users have
// been warned (HISTORY.md) that they can no longer call this on
// the built-in BloomFilterPolicy (unlikely).
assert(false);
return GetBuilderWithContext(FilterBuildingContext(BlockBasedTableOptions()));
}
FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
Mode cur = mode_;
// Unusual code construction so that we can have just
// one exhaustive switch without (risky) recursion
for (int i = 0; i < 2; ++i) {
switch (cur) {
case kAuto:
if (context.table_options.format_version < 5) {
cur = kLegacyBloom;
} else {
cur = kFastLocalBloom;
}
break;
case kDeprecatedBlock:
return nullptr;
case kFastLocalBloom:
return new FastLocalBloomBitsBuilder(millibits_per_key_);
case kLegacyBloom:
return new LegacyBloomBitsBuilder(whole_bits_per_key_);
}
}
assert(false);
return nullptr; // something legal
}
FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext(
const FilterBuildingContext& context) {
if (context.table_options.filter_policy) {
return context.table_options.filter_policy->GetBuilderWithContext(context);
} else {
return nullptr;
}
}
// Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one.
FilterBitsReader* BloomFilterPolicy::GetFilterBitsReader(
const Slice& contents) const {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
if (len_with_meta <= 5) {
// filter is empty or broken. Treat like zero keys added.
return new AlwaysFalseFilter();
}
// Legacy Bloom filter data:
// 0 +-----------------------------------+
// | Raw Bloom filter data |
// | ... |
// len +-----------------------------------+
// | byte for num_probes or |
// | marker for new implementations |
// len+1 +-----------------------------------+
// | four bytes for number of cache |
// | lines |
// len_with_meta +-----------------------------------+
int8_t raw_num_probes =
static_cast<int8_t>(contents.data()[len_with_meta - 5]);
// NB: *num_probes > 30 and < 128 probably have not been used, because of
// BloomFilterPolicy::initialize, unless directly calling
// LegacyBloomBitsBuilder as an API, but we are leaving those cases in
// limbo with LegacyBloomBitsReader for now.
if (raw_num_probes < 1) {
// Note: < 0 (or unsigned > 127) indicate special new implementations
// (or reserved for future use)
if (raw_num_probes == -1) {
// Marker for newer Bloom implementations
return GetBloomBitsReader(contents);
}
// otherwise
// Treat as zero probes (always FP) for now.
return new AlwaysTrueFilter();
}
// else attempt decode for LegacyBloomBitsReader
int num_probes = raw_num_probes;
assert(num_probes >= 1);
assert(num_probes <= 127);
uint32_t len = len_with_meta - 5;
assert(len > 0);
uint32_t num_lines = DecodeFixed32(contents.data() + len_with_meta - 4);
uint32_t log2_cache_line_size;
if (num_lines * CACHE_LINE_SIZE == len) {
// Common case
log2_cache_line_size = folly::constexpr_log2(CACHE_LINE_SIZE);
} else if (num_lines == 0 || len % num_lines != 0) {
// Invalid (no solution to num_lines * x == len)
// Treat as zero probes (always FP) for now.
return new AlwaysTrueFilter();
} else {
// Determine the non-native cache line size (from another system)
log2_cache_line_size = 0;
while ((num_lines << log2_cache_line_size) < len) {
++log2_cache_line_size;
}
if ((num_lines << log2_cache_line_size) != len) {
// Invalid (block size not a power of two)
// Treat as zero probes (always FP) for now.
return new AlwaysTrueFilter();
}
}
// if not early return
return new LegacyBloomBitsReader(contents.data(), num_probes, num_lines,
log2_cache_line_size);
}
// For newer Bloom filter implementations
FilterBitsReader* BloomFilterPolicy::GetBloomBitsReader(
const Slice& contents) const {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
uint32_t len = len_with_meta - 5;
assert(len > 0); // precondition
// New Bloom filter data:
// 0 +-----------------------------------+
// | Raw Bloom filter data |
// | ... |
// len +-----------------------------------+
// | char{-1} byte -> new Bloom filter |
// len+1 +-----------------------------------+
// | byte for subimplementation |
// | 0: FastLocalBloom |
// | other: reserved |
// len+2 +-----------------------------------+
// | byte for block_and_probes |
// | 0 in top 3 bits -> 6 -> 64-byte |
// | reserved: |
// | 1 in top 3 bits -> 7 -> 128-byte|
// | 2 in top 3 bits -> 8 -> 256-byte|
// | ... |
// | num_probes in bottom 5 bits, |
// | except 0 and 31 reserved |
// len+3 +-----------------------------------+
// | two bytes reserved |
// | possibly for hash seed |
// len_with_meta +-----------------------------------+
// Read more metadata (see above)
char sub_impl_val = contents.data()[len_with_meta - 4];
char block_and_probes = contents.data()[len_with_meta - 3];
int log2_block_bytes = ((block_and_probes >> 5) & 7) + 6;
int num_probes = (block_and_probes & 31);
if (num_probes < 1 || num_probes > 30) {
// Reserved / future safe
return new AlwaysTrueFilter();
}
uint16_t rest = DecodeFixed16(contents.data() + len_with_meta - 2);
if (rest != 0) {
// Reserved, possibly for hash seed
// Future safe
return new AlwaysTrueFilter();
}
if (sub_impl_val == 0) { // FastLocalBloom
if (log2_block_bytes == 6) { // Only block size supported for now
return new FastLocalBloomBitsReader(contents.data(), num_probes, len);
}
}
// otherwise
// Reserved / future safe
return new AlwaysTrueFilter();
}
const FilterPolicy* NewBloomFilterPolicy(double bits_per_key,
bool use_block_based_builder) {
BloomFilterPolicy::Mode m;
if (use_block_based_builder) {
m = BloomFilterPolicy::kDeprecatedBlock;
} else {
m = BloomFilterPolicy::kAuto;
}
assert(std::find(BloomFilterPolicy::kAllUserModes.begin(),
BloomFilterPolicy::kAllUserModes.end(),
m) != BloomFilterPolicy::kAllUserModes.end());
return new BloomFilterPolicy(bits_per_key, m);
}
FilterBuildingContext::FilterBuildingContext(
const BlockBasedTableOptions& _table_options)
: table_options(_table_options) {}
FilterPolicy::~FilterPolicy() { }
} // namespace rocksdb