rocksdb/table/block_based/full_filter_block.h
Levi Tamasi 29ccf2075c Store the filter bits reader alongside the filter block contents (#5936)
Summary:
Amongst other things, PR https://github.com/facebook/rocksdb/issues/5504 refactored the filter block readers so that
only the filter block contents are stored in the block cache (as opposed to the
earlier design where the cache stored the filter block reader itself, leading to
potentially dangling pointers and concurrency bugs). However, this change
introduced a performance hit since with the new code, the metadata fields are
re-parsed upon every access. This patch reunites the block contents with the
filter bits reader to eliminate this overhead; since this is still a self-contained
pure data object, it is safe to store it in the cache. (Note: this is similar to how
the zstd digest is handled.)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5936

Test Plan:
make asan_check

filter_bench results for the old code:

```
$ ./filter_bench -quick
WARNING: Assertions are enabled; benchmarks unnecessarily slow
Building...
Build avg ns/key: 26.7153
Number of filters: 16669
Total memory (MB): 200.009
Bits/key actual: 10.0647
----------------------------
Inside queries...
  Dry run (46b) ns/op: 33.4258
  Single filter ns/op: 42.5974
  Random filter ns/op: 217.861
----------------------------
Outside queries...
  Dry run (25d) ns/op: 32.4217
  Single filter ns/op: 50.9855
  Random filter ns/op: 219.167
    Average FP rate %: 1.13993
----------------------------
Done. (For more info, run with -legend or -help.)

$ ./filter_bench -quick -use_full_block_reader
WARNING: Assertions are enabled; benchmarks unnecessarily slow
Building...
Build avg ns/key: 26.5172
Number of filters: 16669
Total memory (MB): 200.009
Bits/key actual: 10.0647
----------------------------
Inside queries...
  Dry run (46b) ns/op: 32.3556
  Single filter ns/op: 83.2239
  Random filter ns/op: 370.676
----------------------------
Outside queries...
  Dry run (25d) ns/op: 32.2265
  Single filter ns/op: 93.5651
  Random filter ns/op: 408.393
    Average FP rate %: 1.13993
----------------------------
Done. (For more info, run with -legend or -help.)
```

With the new code:

```
$ ./filter_bench -quick
WARNING: Assertions are enabled; benchmarks unnecessarily slow
Building...
Build avg ns/key: 25.4285
Number of filters: 16669
Total memory (MB): 200.009
Bits/key actual: 10.0647
----------------------------
Inside queries...
  Dry run (46b) ns/op: 31.0594
  Single filter ns/op: 43.8974
  Random filter ns/op: 226.075
----------------------------
Outside queries...
  Dry run (25d) ns/op: 31.0295
  Single filter ns/op: 50.3824
  Random filter ns/op: 226.805
    Average FP rate %: 1.13993
----------------------------
Done. (For more info, run with -legend or -help.)

$ ./filter_bench -quick -use_full_block_reader
WARNING: Assertions are enabled; benchmarks unnecessarily slow
Building...
Build avg ns/key: 26.5308
Number of filters: 16669
Total memory (MB): 200.009
Bits/key actual: 10.0647
----------------------------
Inside queries...
  Dry run (46b) ns/op: 33.2968
  Single filter ns/op: 58.6163
  Random filter ns/op: 291.434
----------------------------
Outside queries...
  Dry run (25d) ns/op: 32.1839
  Single filter ns/op: 66.9039
  Random filter ns/op: 292.828
    Average FP rate %: 1.13993
----------------------------
Done. (For more info, run with -legend or -help.)
```

Differential Revision: D17991712

Pulled By: ltamasi

fbshipit-source-id: 7ea205550217bfaaa1d5158ebd658e5832e60f29
2019-10-18 19:32:59 -07:00

140 lines
5.5 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <string>
#include <vector>
#include "db/dbformat.h"
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "table/block_based/filter_block_reader_common.h"
#include "table/block_based/parsed_full_filter_block.h"
#include "util/hash.h"
namespace rocksdb {
class FilterPolicy;
class FilterBitsBuilder;
class FilterBitsReader;
// A FullFilterBlockBuilder is used to construct a full filter for a
// particular Table. It generates a single string which is stored as
// a special block in the Table.
// The format of full filter block is:
// +----------------------------------------------------------------+
// | full filter for all keys in sst file |
// +----------------------------------------------------------------+
// The full filter can be very large. At the end of it, we put
// num_probes: how many hash functions are used in bloom filter
//
class FullFilterBlockBuilder : public FilterBlockBuilder {
public:
explicit FullFilterBlockBuilder(const SliceTransform* prefix_extractor,
bool whole_key_filtering,
FilterBitsBuilder* filter_bits_builder);
// No copying allowed
FullFilterBlockBuilder(const FullFilterBlockBuilder&) = delete;
void operator=(const FullFilterBlockBuilder&) = delete;
// bits_builder is created in filter_policy, it should be passed in here
// directly. and be deleted here
~FullFilterBlockBuilder() {}
virtual bool IsBlockBased() override { return false; }
virtual void StartBlock(uint64_t /*block_offset*/) override {}
virtual void Add(const Slice& key) override;
virtual size_t NumAdded() const override { return num_added_; }
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
using FilterBlockBuilder::Finish;
protected:
virtual void AddKey(const Slice& key);
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
virtual void Reset();
void AddPrefix(const Slice& key);
const SliceTransform* prefix_extractor() { return prefix_extractor_; }
private:
// important: all of these might point to invalid addresses
// at the time of destruction of this filter block. destructor
// should NOT dereference them.
const SliceTransform* prefix_extractor_;
bool whole_key_filtering_;
bool last_whole_key_recorded_;
std::string last_whole_key_str_;
bool last_prefix_recorded_;
std::string last_prefix_str_;
uint32_t num_added_;
std::unique_ptr<const char[]> filter_data_;
};
// A FilterBlockReader is used to parse filter from SST table.
// KeyMayMatch and PrefixMayMatch would trigger filter checking
class FullFilterBlockReader
: public FilterBlockReaderCommon<ParsedFullFilterBlock> {
public:
FullFilterBlockReader(const BlockBasedTable* t,
CachableEntry<ParsedFullFilterBlock>&& filter_block);
static std::unique_ptr<FilterBlockReader> Create(
const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
bool use_cache, bool prefetch, bool pin,
BlockCacheLookupContext* lookup_context);
bool IsBlockBased() override { return false; }
bool KeyMayMatch(const Slice& key, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr, GetContext* get_context,
BlockCacheLookupContext* lookup_context) override;
bool PrefixMayMatch(const Slice& prefix,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr,
GetContext* get_context,
BlockCacheLookupContext* lookup_context) override;
void KeysMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* lookup_context) override;
void PrefixesMayMatch(MultiGetRange* range,
const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* lookup_context) override;
size_t ApproximateMemoryUsage() const override;
bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key,
const SliceTransform* prefix_extractor,
const Comparator* comparator,
const Slice* const const_ikey_ptr, bool* filter_checked,
bool need_upper_bound_check,
BlockCacheLookupContext* lookup_context) override;
private:
bool MayMatch(const Slice& entry, bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const;
void MayMatch(MultiGetRange* range, bool no_io,
const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const;
bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix,
const Comparator* comparator) const;
private:
bool full_length_enabled_;
size_t prefix_extractor_full_length_;
};
} // namespace rocksdb