Add table properties for number of entries added to filters (#8323)

Summary:
With Ribbon filter work and possible variance in actual bits
per key (or prefix; general term "entry") to achieve certain FP rates,
I've received a request to be able to track actual bits per key in
generated filters. This change adds a num_filter_entries table
property, which can be combined with filter_size to get bits per key
(entry).

This can vary from num_entries in at least these ways:
* Different versions of same key are only counted once in filters.
* With prefix filters, several user keys map to the same filter entry.
* A single filter can include both prefixes and user keys.

Note that FilterBlockBuilder::NumAdded() didn't do anything useful
except distinguish empty from non-empty.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8323

Test Plan: basic unit test included, others updated

Reviewed By: jay-zhuang

Differential Revision: D28596210

Pulled By: pdillinger

fbshipit-source-id: 529a111f3c84501e5a470bc84705e436ee68c376
This commit is contained in:
Peter Dillinger 2021-05-21 17:10:29 -07:00 committed by Facebook GitHub Bot
parent 6c86543590
commit 3469d60fcc
18 changed files with 123 additions and 29 deletions

View File

@ -20,6 +20,7 @@
* Added DB::Properties::kBlockCacheEntryStats for querying statistics on what percentage of block cache is used by various kinds of blocks, etc. using DB::GetProperty and DB::GetMapProperty. The same information is now dumped to info LOG periodically according to `stats_dump_period_sec`.
* Add an experimental Remote Compaction feature, which allows the user to run Compaction on a different host or process. The feature is still under development, currently only works on some basic use cases. The interface will be changed without backward/forward compatibility support.
* RocksDB would validate total entries read in flush, and compare with counter inserted into it. If flush_verify_memtable_count = true (default), flush will fail. Otherwise, only log to info logs.
* Add `TableProperties::num_filter_entries`, which can be used with `TableProperties::filter_size` to calculate the effective bits per filter entry (unique user key or prefix) for a table file.
### Performance Improvements
* BlockPrefetcher is used by iterators to prefetch data if they anticipate more data to be used in future. It is enabled implicitly by rocksdb. Added change to take in account read pattern if reads are sequential. This would disable prefetching for random reads in MultiGet and iterators as readahead_size is increased exponential doing large prefetches.

View File

@ -15,6 +15,7 @@
#include "port/stack_trace.h"
#include "rocksdb/perf_context.h"
#include "table/block_based/filter_policy_internal.h"
#include "util/string_util.h"
namespace ROCKSDB_NAMESPACE {
@ -506,6 +507,21 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) {
ASSERT_LE(reads, 3 * N / 100);
}
#ifndef ROCKSDB_LITE
// Sanity check some table properties
std::map<std::string, std::string> props;
ASSERT_TRUE(db_->GetMapProperty(
handles_[1], DB::Properties::kAggregatedTableProperties, &props));
uint64_t nkeys = N + N / 100;
uint64_t filter_size = ParseUint64(props["filter_size"]);
EXPECT_LE(filter_size,
(partition_filters_ ? 12 : 11) * nkeys / /*bits / byte*/ 8);
EXPECT_GE(filter_size, 10 * nkeys / /*bits / byte*/ 8);
uint64_t num_filter_entries = ParseUint64(props["num_filter_entries"]);
EXPECT_EQ(num_filter_entries, nkeys);
#endif // ROCKSDB_LITE
env_->delay_sstable_sync_.store(false, std::memory_order_release);
Close();
} while (ChangeCompactOptions());

View File

@ -108,6 +108,7 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished(
table_properties.num_entries)
<< "num_data_blocks" << table_properties.num_data_blocks
<< "num_entries" << table_properties.num_entries
<< "num_filter_entries" << table_properties.num_filter_entries
<< "num_deletions" << table_properties.num_deletions
<< "num_merge_operands" << table_properties.num_merge_operands
<< "num_range_deletions" << table_properties.num_range_deletions

View File

@ -42,11 +42,24 @@ class FilterBitsBuilder {
public:
virtual ~FilterBitsBuilder() {}
// Add Key to filter, you could use any way to store the key.
// Such as: storing hashes or original keys
// Keys are in sorted order and duplicated keys are possible.
// Add a key (or prefix) to the filter. Typically, a builder will keep
// a set of 64-bit key hashes and only build the filter in Finish
// when the final number of keys is known. Keys are added in sorted order
// and duplicated keys are possible, so typically, the builder will
// only add this key if its hash is different from the most recently
// added.
virtual void AddKey(const Slice& key) = 0;
// Called by RocksDB before Finish to populate
// TableProperties::num_filter_entries, so should represent the
// number of unique keys (and/or prefixes) added, but does not have
// to be exact.
virtual size_t EstimateEntriesAdded() {
// Default implementation for backward compatibility.
// 0 conspicuously stands for "unknown".
return 0;
}
// Generate the filter using the keys that are added
// The return value of this function would be the filter bits,
// The ownership of actual data is set to buf

View File

@ -44,6 +44,7 @@ struct TablePropertiesNames {
static const std::string kRawValueSize;
static const std::string kNumDataBlocks;
static const std::string kNumEntries;
static const std::string kNumFilterEntries;
static const std::string kDeletedKeys;
static const std::string kMergeOperands;
static const std::string kNumRangeDeletions;
@ -175,6 +176,8 @@ struct TableProperties {
uint64_t num_data_blocks = 0;
// the number of entries in this table
uint64_t num_entries = 0;
// the number of unique entries (keys or prefixes) added to filters
uint64_t num_filter_entries = 0;
// the number of deletions in the table
uint64_t num_deletions = 0;
// the number of merge operands in the table

View File

@ -68,7 +68,7 @@ BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
whole_key_filtering_(table_opt.whole_key_filtering),
prev_prefix_start_(0),
prev_prefix_size_(0),
num_added_(0) {
total_added_in_built_(0) {
assert(policy_);
}
@ -80,6 +80,10 @@ void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
}
}
size_t BlockBasedFilterBlockBuilder::EstimateEntriesAdded() {
return total_added_in_built_ + start_.size();
}
void BlockBasedFilterBlockBuilder::Add(const Slice& key_without_ts) {
if (prefix_extractor_ && prefix_extractor_->InDomain(key_without_ts)) {
AddPrefix(key_without_ts);
@ -92,7 +96,6 @@ void BlockBasedFilterBlockBuilder::Add(const Slice& key_without_ts) {
// Add key to filter if needed
inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
num_added_++;
start_.push_back(entries_.size());
entries_.append(key.data(), key.size());
}
@ -118,6 +121,7 @@ Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Status* status) {
// In this impl we ignore BlockHandle
*status = Status::OK();
if (!start_.empty()) {
GenerateFilter();
}
@ -140,6 +144,7 @@ void BlockBasedFilterBlockBuilder::GenerateFilter() {
filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
return;
}
total_added_in_built_ += num_entries;
// Make list of keys from flattened key structure
start_.push_back(entries_.size()); // Simplify length computation

View File

@ -45,7 +45,10 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
virtual bool IsBlockBased() override { return true; }
virtual void StartBlock(uint64_t block_offset) override;
virtual void Add(const Slice& key_without_ts) override;
virtual size_t NumAdded() const override { return num_added_; }
virtual bool IsEmpty() const override {
return start_.empty() && filter_offsets_.empty();
}
virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
using FilterBlockBuilder::Finish;
@ -70,7 +73,7 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
std::string result_; // Filter data computed so far
std::vector<Slice> tmp_entries_; // policy_->CreateFilter() argument
std::vector<uint32_t> filter_offsets_;
size_t num_added_; // Number of keys added
uint64_t total_added_in_built_; // Total keys added to filters built so far
};
// A FilterBlockReader is used to parse filter from SST table.

View File

@ -76,17 +76,26 @@ TEST_F(FilterBlockTest, EmptyBuilder) {
TEST_F(FilterBlockTest, SingleChunk) {
BlockBasedFilterBlockBuilder builder(nullptr, table_options_);
ASSERT_EQ(0, builder.NumAdded());
ASSERT_TRUE(builder.IsEmpty());
builder.StartBlock(100);
builder.Add("foo");
ASSERT_FALSE(builder.IsEmpty());
builder.Add("bar");
builder.Add("bar");
builder.Add("box");
builder.StartBlock(200);
builder.Add("box");
builder.StartBlock(300);
builder.Add("hello");
ASSERT_EQ(5, builder.NumAdded());
Slice slice(builder.Finish());
// XXX: "bar" should only count once but is counted twice. This actually
// indicates a serious space usage bug in old block-based filter. Good
// that it is deprecated.
// "box" counts twice, because it's in distinct blocks.
ASSERT_EQ(6, builder.EstimateEntriesAdded());
ASSERT_FALSE(builder.IsEmpty());
Status s;
Slice slice = builder.Finish(BlockHandle(), &s);
ASSERT_OK(s);
CachableEntry<BlockContents> block(
new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */,

View File

@ -1422,9 +1422,11 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents,
void BlockBasedTableBuilder::WriteFilterBlock(
MetaIndexBuilder* meta_index_builder) {
BlockHandle filter_block_handle;
bool empty_filter_block = (rep_->filter_builder == nullptr ||
rep_->filter_builder->NumAdded() == 0);
bool empty_filter_block =
(rep_->filter_builder == nullptr || rep_->filter_builder->IsEmpty());
if (ok() && !empty_filter_block) {
rep_->props.num_filter_entries +=
rep_->filter_builder->EstimateEntriesAdded();
Status s = Status::Incomplete();
while (ok() && s.IsIncomplete()) {
Slice filter_content =

View File

@ -62,7 +62,9 @@ class FilterBlockBuilder {
virtual void StartBlock(uint64_t block_offset) = 0; // Start new block filter
virtual void Add(
const Slice& key_without_ts) = 0; // Add a key to current filter
virtual size_t NumAdded() const = 0; // Number of keys added
virtual bool IsEmpty() const = 0; // Empty == none added
// For reporting stats on how many entries the builder considered unique
virtual size_t EstimateEntriesAdded() = 0;
Slice Finish() { // Generate Filter
const BlockHandle empty_handle;
Status dont_care_status;

View File

@ -63,6 +63,10 @@ class XXH3pFilterBitsBuilder : public BuiltinFilterBitsBuilder {
}
}
virtual size_t EstimateEntriesAdded() override {
return hash_entries_.size();
}
protected:
static constexpr uint32_t kMetadataLen = 5;
@ -763,6 +767,10 @@ class LegacyBloomBitsBuilder : public BuiltinFilterBitsBuilder {
void AddKey(const Slice& key) override;
virtual size_t EstimateEntriesAdded() override {
return hash_entries_.size();
}
Slice Finish(std::unique_ptr<const char[]>* buf) override;
size_t CalculateSpace(size_t num_entries) override {

View File

@ -23,11 +23,15 @@ FullFilterBlockBuilder::FullFilterBlockBuilder(
last_whole_key_recorded_(false),
last_prefix_recorded_(false),
last_key_in_domain_(false),
num_added_(0) {
any_added_(false) {
assert(filter_bits_builder != nullptr);
filter_bits_builder_.reset(filter_bits_builder);
}
size_t FullFilterBlockBuilder::EstimateEntriesAdded() {
return filter_bits_builder_->EstimateEntriesAdded();
}
void FullFilterBlockBuilder::Add(const Slice& key_without_ts) {
const bool add_prefix =
prefix_extractor_ && prefix_extractor_->InDomain(key_without_ts);
@ -69,7 +73,7 @@ void FullFilterBlockBuilder::Add(const Slice& key_without_ts) {
// Add key to filter if needed
inline void FullFilterBlockBuilder::AddKey(const Slice& key) {
filter_bits_builder_->AddKey(key);
num_added_++;
any_added_ = true;
}
// Add prefix to filter if needed
@ -102,8 +106,8 @@ Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Reset();
// In this impl we ignore BlockHandle
*status = Status::OK();
if (num_added_ != 0) {
num_added_ = 0;
if (any_added_) {
any_added_ = false;
return filter_bits_builder_->Finish(&filter_data_);
}
return Slice();

View File

@ -51,7 +51,8 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
virtual bool IsBlockBased() override { return false; }
virtual void StartBlock(uint64_t /*block_offset*/) override {}
virtual void Add(const Slice& key_without_ts) override;
virtual size_t NumAdded() const override { return num_added_; }
virtual bool IsEmpty() const override { return !any_added_; }
virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
using FilterBlockBuilder::Finish;
@ -78,8 +79,7 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
// filter partition will be added to the current partition if
// last_key_in_domain_ is true, regardless of the current key.
bool last_key_in_domain_;
uint32_t num_added_;
bool any_added_;
std::unique_ptr<const char[]> filter_data_;
};

View File

@ -3,13 +3,16 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "table/block_based/full_filter_block.h"
#include <set>
#include "table/block_based/full_filter_block.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/status.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/mock_block_based_table.h"
#include "table/block_based/filter_policy_internal.h"
#include "table/block_based/mock_block_based_table.h"
#include "table/format.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/coding.h"
@ -239,11 +242,9 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
ASSERT_EQ(0, bits_builder->CountUnique());
// adds key and empty prefix; both abstractions count them
builder.Add("key1");
ASSERT_EQ(2, builder.NumAdded());
ASSERT_EQ(2, bits_builder->CountUnique());
// Add different key (unique) and also empty prefix (not unique).
// From here in this test, it's immaterial whether the block builder
@ -262,7 +263,6 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
const bool WHOLE_KEY = true;
FullFilterBlockBuilder builder(prefix_extractor.get(), WHOLE_KEY,
bits_builder);
ASSERT_EQ(0, builder.NumAdded());
builder.Add(""); // test with empty key too
builder.Add("prefix1key1");
builder.Add("prefix1key1");
@ -275,14 +275,19 @@ TEST_F(FullFilterBlockTest, DuplicateEntries) {
TEST_F(FullFilterBlockTest, SingleChunk) {
FullFilterBlockBuilder builder(nullptr, true, GetBuilder());
ASSERT_EQ(0, builder.NumAdded());
ASSERT_TRUE(builder.IsEmpty());
builder.Add("foo");
ASSERT_FALSE(builder.IsEmpty());
builder.Add("bar");
builder.Add("box");
builder.Add("box");
builder.Add("hello");
ASSERT_EQ(5, builder.NumAdded());
Slice slice = builder.Finish();
// "box" only counts once
ASSERT_EQ(4, builder.EstimateEntriesAdded());
ASSERT_FALSE(builder.IsEmpty());
Status s;
Slice slice = builder.Finish(BlockHandle(), &s);
ASSERT_OK(s);
CachableEntry<ParsedFullFilterBlock> block(
new ParsedFullFilterBlock(table_options_.filter_policy.get(),

View File

@ -33,7 +33,8 @@ PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder(
true /*use_delta_encoding*/,
use_value_delta_encoding),
p_index_builder_(p_index_builder),
keys_added_to_partition_(0) {
keys_added_to_partition_(0),
total_added_in_built_(0) {
keys_per_partition_ = static_cast<uint32_t>(
filter_bits_builder_->ApproximateNumEntries(partition_size));
if (keys_per_partition_ < 1) {
@ -85,6 +86,7 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
}
}
total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded();
Slice filter = filter_bits_builder_->Finish(&filter_gc.back());
std::string& index_key = p_index_builder_->GetPartitionKey();
filters.push_back({index_key, filter});
@ -102,6 +104,10 @@ void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {
keys_added_to_partition_++;
}
size_t PartitionedFilterBlockBuilder::EstimateEntriesAdded() {
return total_added_in_built_ + filter_bits_builder_->EstimateEntriesAdded();
}
Slice PartitionedFilterBlockBuilder::Finish(
const BlockHandle& last_partition_block_handle, Status* status) {
if (finishing_filters == true) {
@ -131,6 +137,8 @@ Slice PartitionedFilterBlockBuilder::Finish(
if (UNLIKELY(filters.empty())) {
*status = Status::OK();
if (finishing_filters) {
// Simplest to just add them all at the end
total_added_in_built_ = 0;
if (p_index_builder_->seperator_is_key_plus_seq()) {
return index_on_filter_block_builder_.Finish();
} else {

View File

@ -8,6 +8,7 @@
#include <list>
#include <string>
#include <unordered_map>
#include "db/dbformat.h"
#include "index_builder.h"
#include "rocksdb/options.h"
@ -33,6 +34,7 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
void AddKey(const Slice& key) override;
void Add(const Slice& key) override;
size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& last_partition_block_handle,
Status* status) override;
@ -62,6 +64,9 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
uint32_t keys_per_partition_;
// The number of keys added to the last partition so far
uint32_t keys_added_to_partition_;
// According to the bits builders, how many keys/prefixes added
// in all the filters we have fully built
uint64_t total_added_in_built_;
BlockHandle last_encoded_handle_;
};

View File

@ -83,6 +83,7 @@ void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) {
Add(TablePropertiesNames::kIndexValueIsDeltaEncoded,
props.index_value_is_delta_encoded);
Add(TablePropertiesNames::kNumEntries, props.num_entries);
Add(TablePropertiesNames::kNumFilterEntries, props.num_filter_entries);
Add(TablePropertiesNames::kDeletedKeys, props.num_deletions);
Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands);
Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions);
@ -269,6 +270,8 @@ Status ReadProperties(const ReadOptions& read_options,
{TablePropertiesNames::kNumDataBlocks,
&new_table_properties->num_data_blocks},
{TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
{TablePropertiesNames::kNumFilterEntries,
&new_table_properties->num_filter_entries},
{TablePropertiesNames::kDeletedKeys,
&new_table_properties->num_deletions},
{TablePropertiesNames::kMergeOperands,

View File

@ -111,6 +111,8 @@ std::string TableProperties::ToString(
}
AppendProperty(result, "filter block size", filter_size, prop_delim,
kv_delim);
AppendProperty(result, "# entries for filter", num_filter_entries, prop_delim,
kv_delim);
AppendProperty(result, "(estimated) table size",
data_size + index_size + filter_size, prop_delim, kv_delim);
@ -193,6 +195,7 @@ void TableProperties::Add(const TableProperties& tp) {
raw_value_size += tp.raw_value_size;
num_data_blocks += tp.num_data_blocks;
num_entries += tp.num_entries;
num_filter_entries += tp.num_filter_entries;
num_deletions += tp.num_deletions;
num_merge_operands += tp.num_merge_operands;
num_range_deletions += tp.num_range_deletions;
@ -214,6 +217,7 @@ TableProperties::GetAggregatablePropertiesAsMap() const {
rv["raw_value_size"] = raw_value_size;
rv["num_data_blocks"] = num_data_blocks;
rv["num_entries"] = num_entries;
rv["num_filter_entries"] = num_filter_entries;
rv["num_deletions"] = num_deletions;
rv["num_merge_operands"] = num_merge_operands;
rv["num_range_deletions"] = num_range_deletions;
@ -251,6 +255,8 @@ const std::string TablePropertiesNames::kNumDataBlocks =
"rocksdb.num.data.blocks";
const std::string TablePropertiesNames::kNumEntries =
"rocksdb.num.entries";
const std::string TablePropertiesNames::kNumFilterEntries =
"rocksdb.num.filter_entries";
const std::string TablePropertiesNames::kDeletedKeys = "rocksdb.deleted.keys";
const std::string TablePropertiesNames::kMergeOperands =
"rocksdb.merge.operands";