Compare commits

...

4 Commits

Author SHA1 Message Date
Peter Dillinger
6904fdb1a7 Update HISTORY.md for C API 2021-08-20 16:05:55 -07:00
Peter Dillinger
46c4a5a5da Merge remote-tracking branch 'origin/master' into ribbon_bloom_hybrid 2021-08-20 16:00:50 -07:00
Peter Dillinger
0d3948532a C API update and fixes 2021-08-19 22:43:18 -07:00
Peter Dillinger
c4d22d517f Add Bloom/Ribbon hybrid API support
Summary: This is essentially resurrection and fixing of level support
in #8198 and reverted in #8212. TODO: finish summary

Test Plan: new + updated tests
2021-08-19 17:23:00 -07:00
13 changed files with 341 additions and 70 deletions

View File

@ -23,10 +23,12 @@
* Added a stat rocksdb.secondary.cache.hits * Added a stat rocksdb.secondary.cache.hits
* Added a PerfContext counter secondary_cache_hit_count * Added a PerfContext counter secondary_cache_hit_count
* The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`. * The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`.
* Added hybrid configuration of Ribbon filter and Bloom filter where some LSM levels use Ribbon for memory space efficiency and some use Bloom for speed. See NewRibbonFilterPolicy. This also changes the default behavior of NewRibbonFilterPolicy to use Bloom for flushes under Leveled and Universal compaction and Ribbon otherwise. The C API function `rocksdb_filterpolicy_create_ribbon` is unchanged but adds new `rocksdb_filterpolicy_create_ribbon_hybrid`.
## Public API change ## Public API change
* Added APIs to decode and replay trace file via Replayer class. Added `DB::NewDefaultReplayer()` to create a default Replayer instance. Added `TraceReader::Reset()` to restart reading a trace file. Created trace_record.h, trace_record_result.h and utilities/replayer.h files to access the decoded Trace records, replay them, and query the actual operation results. * Added APIs to decode and replay trace file via Replayer class. Added `DB::NewDefaultReplayer()` to create a default Replayer instance. Added `TraceReader::Reset()` to restart reading a trace file. Created trace_record.h, trace_record_result.h and utilities/replayer.h files to access the decoded Trace records, replay them, and query the actual operation results.
* Added Configurable::GetOptionsMap to the public API for use in creating new Customizable classes. * Added Configurable::GetOptionsMap to the public API for use in creating new Customizable classes.
* Generalized bits_per_key parameters in C API from int to double for greater configurability.
### Performance Improvements ### Performance Improvements
* Try to avoid updating DBOptions if `SetDBOptions()` does not change any option value. * Try to avoid updating DBOptions if `SetDBOptions()` does not change any option value.

23
db/c.cc
View File

@ -3840,7 +3840,8 @@ void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t* filter) {
delete filter; delete filter;
} }
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format(int bits_per_key, bool original_format) { rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format(
double bits_per_key, bool original_format) {
// Make a rocksdb_filterpolicy_t, but override all of its methods so // Make a rocksdb_filterpolicy_t, but override all of its methods so
// they delegate to a NewBloomFilterPolicy() instead of user // they delegate to a NewBloomFilterPolicy() instead of user
// supplied C functions. // supplied C functions.
@ -3875,16 +3876,17 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format(int bits_per_ke
return wrapper; return wrapper;
} }
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_full(int bits_per_key) { rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_full(
double bits_per_key) {
return rocksdb_filterpolicy_create_bloom_format(bits_per_key, false); return rocksdb_filterpolicy_create_bloom_format(bits_per_key, false);
} }
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key) { rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(double bits_per_key) {
return rocksdb_filterpolicy_create_bloom_format(bits_per_key, true); return rocksdb_filterpolicy_create_bloom_format(bits_per_key, true);
} }
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format( rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format(
int bloom_equivalent_bits_per_key) { double bloom_equivalent_bits_per_key, int bloom_before_level) {
// Make a rocksdb_filterpolicy_t, but override all of its methods so // Make a rocksdb_filterpolicy_t, but override all of its methods so
// they delegate to a NewRibbonFilterPolicy() instead of user // they delegate to a NewRibbonFilterPolicy() instead of user
// supplied C functions. // supplied C functions.
@ -3911,7 +3913,8 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format(
static void DoNothing(void*) {} static void DoNothing(void*) {}
}; };
Wrapper* wrapper = new Wrapper; Wrapper* wrapper = new Wrapper;
wrapper->rep_ = NewRibbonFilterPolicy(bloom_equivalent_bits_per_key); wrapper->rep_ =
NewRibbonFilterPolicy(bloom_equivalent_bits_per_key, bloom_before_level);
wrapper->state_ = nullptr; wrapper->state_ = nullptr;
wrapper->delete_filter_ = nullptr; wrapper->delete_filter_ = nullptr;
wrapper->destructor_ = &Wrapper::DoNothing; wrapper->destructor_ = &Wrapper::DoNothing;
@ -3919,9 +3922,15 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format(
} }
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon( rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon(
int bloom_equivalent_bits_per_key) { double bloom_equivalent_bits_per_key) {
return rocksdb_filterpolicy_create_ribbon_format( return rocksdb_filterpolicy_create_ribbon_format(
bloom_equivalent_bits_per_key); bloom_equivalent_bits_per_key, /*bloom_before_level = disabled*/ -1);
}
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_hybrid(
double bloom_equivalent_bits_per_key, int bloom_before_level) {
return rocksdb_filterpolicy_create_ribbon_format(
bloom_equivalent_bits_per_key, bloom_before_level);
} }
rocksdb_mergeoperator_t* rocksdb_mergeoperator_create( rocksdb_mergeoperator_t* rocksdb_mergeoperator_create(

View File

@ -1043,21 +1043,25 @@ int main(int argc, char** argv) {
} }
StartPhase("filter"); StartPhase("filter");
for (run = 0; run <= 3; run++) { for (run = 0; run <= 4; run++) {
// First run uses custom filter // run=0 uses custom filter
// Second run uses old block-based bloom filter // run=1 uses old block-based bloom filter
// Third run uses full bloom filter // run=2 run uses full bloom filter
// run=3 uses Ribbon
// run=4 uses Ribbon-Bloom hybrid configuration
CheckNoError(err); CheckNoError(err);
rocksdb_filterpolicy_t* policy; rocksdb_filterpolicy_t* policy;
if (run == 0) { if (run == 0) {
policy = rocksdb_filterpolicy_create(NULL, FilterDestroy, FilterCreate, policy = rocksdb_filterpolicy_create(NULL, FilterDestroy, FilterCreate,
FilterKeyMatch, NULL, FilterName); FilterKeyMatch, NULL, FilterName);
} else if (run == 1) { } else if (run == 1) {
policy = rocksdb_filterpolicy_create_bloom(8); policy = rocksdb_filterpolicy_create_bloom(8.0);
} else if (run == 2) { } else if (run == 2) {
policy = rocksdb_filterpolicy_create_bloom_full(8); policy = rocksdb_filterpolicy_create_bloom_full(8.0);
} else if (run == 3) {
policy = rocksdb_filterpolicy_create_ribbon(8.0);
} else { } else {
policy = rocksdb_filterpolicy_create_ribbon(8); policy = rocksdb_filterpolicy_create_ribbon_hybrid(8.0, 1);
} }
rocksdb_block_based_options_set_filter_policy(table_options, policy); rocksdb_block_based_options_set_filter_policy(table_options, policy);
@ -1123,7 +1127,7 @@ int main(int argc, char** argv) {
} else if (run == 1) { } else if (run == 1) {
// Essentially a fingerprint of the block-based Bloom schema // Essentially a fingerprint of the block-based Bloom schema
CheckCondition(hits == 241); CheckCondition(hits == 241);
} else if (run == 2) { } else if (run == 2 || run == 4) {
// Essentially a fingerprint of full Bloom schema, format_version=5 // Essentially a fingerprint of full Bloom schema, format_version=5
CheckCondition(hits == 188); CheckCondition(hits == 188);
} else { } else {

View File

@ -146,7 +146,7 @@ DECLARE_bool(enable_write_thread_adaptive_yield);
DECLARE_int32(reopen); DECLARE_int32(reopen);
DECLARE_double(bloom_bits); DECLARE_double(bloom_bits);
DECLARE_bool(use_block_based_filter); DECLARE_bool(use_block_based_filter);
DECLARE_bool(use_ribbon_filter); DECLARE_int32(ribbon_starting_level);
DECLARE_bool(partition_filters); DECLARE_bool(partition_filters);
DECLARE_bool(optimize_filters_for_memory); DECLARE_bool(optimize_filters_for_memory);
DECLARE_int32(index_type); DECLARE_int32(index_type);

View File

@ -419,8 +419,11 @@ DEFINE_bool(use_block_based_filter, false,
"use block based filter" "use block based filter"
"instead of full filter for block based table"); "instead of full filter for block based table");
DEFINE_bool(use_ribbon_filter, false, DEFINE_int32(
"Use Ribbon filter instead of Bloom filter"); ribbon_starting_level, 999,
"Use Bloom filter on levels below specified and Ribbon beginning on level "
"specified. Flush is considered level -1. 999 or more -> always Bloom. 0 "
"-> Ribbon except Bloom for flush. -1 -> always Ribbon.");
DEFINE_bool(partition_filters, false, DEFINE_bool(partition_filters, false,
"use partitioned filters " "use partitioned filters "

View File

@ -31,19 +31,21 @@ std::shared_ptr<const FilterPolicy> CreateFilterPolicy() {
return BlockBasedTableOptions().filter_policy; return BlockBasedTableOptions().filter_policy;
} }
const FilterPolicy* new_policy; const FilterPolicy* new_policy;
if (FLAGS_use_ribbon_filter) { if (FLAGS_use_block_based_filter) {
// Old and new API should be same if (FLAGS_ribbon_starting_level < 999) {
if (std::random_device()() & 1) { fprintf(
new_policy = NewExperimentalRibbonFilterPolicy(FLAGS_bloom_bits); stderr,
"Cannot combine use_block_based_filter and ribbon_starting_level\n");
exit(1);
} else { } else {
new_policy = NewRibbonFilterPolicy(FLAGS_bloom_bits);
}
} else {
if (FLAGS_use_block_based_filter) {
new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, true); new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, true);
} else {
new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false);
} }
} else if (FLAGS_ribbon_starting_level >= 999) {
// Use Bloom API
new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false);
} else {
new_policy = NewRibbonFilterPolicy(
FLAGS_bloom_bits, /* bloom_before_level */ FLAGS_ribbon_starting_level);
} }
return std::shared_ptr<const FilterPolicy>(new_policy); return std::shared_ptr<const FilterPolicy>(new_policy);
} }

View File

@ -1599,11 +1599,14 @@ extern ROCKSDB_LIBRARY_API void rocksdb_filterpolicy_destroy(
rocksdb_filterpolicy_t*); rocksdb_filterpolicy_t*);
extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t* extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t*
rocksdb_filterpolicy_create_bloom(int bits_per_key); rocksdb_filterpolicy_create_bloom(double bits_per_key);
extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t* extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t*
rocksdb_filterpolicy_create_bloom_full(int bits_per_key); rocksdb_filterpolicy_create_bloom_full(double bits_per_key);
extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t* extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t*
rocksdb_filterpolicy_create_ribbon(int bloom_equivalent_bits_per_key); rocksdb_filterpolicy_create_ribbon(double bloom_equivalent_bits_per_key);
extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t*
rocksdb_filterpolicy_create_ribbon_hybrid(double bloom_equivalent_bits_per_key,
int bloom_before_level);
/* Merge Operator */ /* Merge Operator */

View File

@ -250,6 +250,20 @@ extern const FilterPolicy* NewBloomFilterPolicy(
// you pass in 10 for bloom_equivalent_bits_per_key, you'll get the same // you pass in 10 for bloom_equivalent_bits_per_key, you'll get the same
// 0.95% FP rate as Bloom filter but only using about 7 bits per key. // 0.95% FP rate as Bloom filter but only using about 7 bits per key.
// //
// The space savings of Ribbon filters makes sense for lower (higher
// numbered; larger; longer-lived) levels of LSM, whereas the speed of
// Bloom filters make sense for highest levels of LSM. Setting
// bloom_before_level allows for this design with Level and Universal
// compaction styles. For example, bloom_before_level=1 means that Bloom
// filters will be used in level 0, including flushes, and Ribbon
// filters elsewhere, including FIFO compaction and external SST files.
// For this option, memtable flushes are considered level -1 (so that
// flushes can be distinguished from intra-L0 compaction).
// bloom_before_level=0 (default) -> Generate Bloom filters only for
// flushes under Level and Universal compaction styles.
// bloom_before_level=-1 -> Always generate Ribbon filters (except in
// some extreme or exceptional cases).
//
// Ribbon filters are compatible with RocksDB >= 6.15.0. Earlier // Ribbon filters are compatible with RocksDB >= 6.15.0. Earlier
// versions reading the data will behave as if no filter was used // versions reading the data will behave as if no filter was used
// (degraded performance until compaction rebuilds filters). All // (degraded performance until compaction rebuilds filters). All
@ -266,12 +280,12 @@ extern const FilterPolicy* NewBloomFilterPolicy(
// Also consider using optimize_filters_for_memory to save filter // Also consider using optimize_filters_for_memory to save filter
// memory. // memory.
extern const FilterPolicy* NewRibbonFilterPolicy( extern const FilterPolicy* NewRibbonFilterPolicy(
double bloom_equivalent_bits_per_key); double bloom_equivalent_bits_per_key, int bloom_before_level = 0);
// Old name // Old name and old default behavior
inline const FilterPolicy* NewExperimentalRibbonFilterPolicy( inline const FilterPolicy* NewExperimentalRibbonFilterPolicy(
double bloom_equivalent_bits_per_key) { double bloom_equivalent_bits_per_key) {
return NewRibbonFilterPolicy(bloom_equivalent_bits_per_key); return NewRibbonFilterPolicy(bloom_equivalent_bits_per_key, -1);
} }
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

View File

@ -932,14 +932,49 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
new_opt.cache_index_and_filter_blocks); new_opt.cache_index_and_filter_blocks);
ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy);
// Ribbon filter policy // Ribbon filter policy (no Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:5.678;", config_options, table_opt, "filter_policy=ribbonfilter:5.678:-1;",
&new_opt)); &new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr); ASSERT_TRUE(new_opt.filter_policy != nullptr);
bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get()); bfp = dynamic_cast<const BloomFilterPolicy*>(new_opt.filter_policy.get());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 5678); EXPECT_EQ(bfp->GetMillibitsPerKey(), 5678);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
// Ribbon filter policy (default Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:6.789;",
&new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr);
auto ltfp = dynamic_cast<const LevelThresholdFilterPolicy*>(
new_opt.filter_policy.get());
EXPECT_EQ(ltfp->TEST_GetStartingLevelForB(), 0);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyA());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyB());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
// Ribbon filter policy (custom Bloom hybrid)
ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=ribbonfilter:6.789:5;",
&new_opt));
ASSERT_TRUE(new_opt.filter_policy != nullptr);
ltfp = dynamic_cast<const LevelThresholdFilterPolicy*>(
new_opt.filter_policy.get());
EXPECT_EQ(ltfp->TEST_GetStartingLevelForB(), 5);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyA());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kFastLocalBloom);
bfp = dynamic_cast<const BloomFilterPolicy*>(ltfp->TEST_GetPolicyB());
EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789);
EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon);
// Old name // Old name
ASSERT_OK(GetBlockBasedTableOptionsFromString( ASSERT_OK(GetBlockBasedTableOptionsFromString(
config_options, table_opt, "filter_policy=experimental_ribbon:6.789;", config_options, table_opt, "filter_policy=experimental_ribbon:6.789;",

View File

@ -1062,7 +1062,7 @@ BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
BloomFilterPolicy::~BloomFilterPolicy() {} BloomFilterPolicy::~BloomFilterPolicy() {}
const char* BloomFilterPolicy::Name() const { const char* BuiltinFilterPolicy::Name() const {
return "rocksdb.BuiltinBloomFilter"; return "rocksdb.BuiltinBloomFilter";
} }
@ -1095,8 +1095,8 @@ void BloomFilterPolicy::CreateFilter(const Slice* keys, int n,
} }
} }
bool BloomFilterPolicy::KeyMayMatch(const Slice& key, bool BuiltinFilterPolicy::KeyMayMatch(const Slice& key,
const Slice& bloom_filter) const { const Slice& bloom_filter) const {
const size_t len = bloom_filter.size(); const size_t len = bloom_filter.size();
if (len < 2 || len > 0xffffffffU) { if (len < 2 || len > 0xffffffffU) {
return false; return false;
@ -1118,7 +1118,7 @@ bool BloomFilterPolicy::KeyMayMatch(const Slice& key,
array); array);
} }
FilterBitsBuilder* BloomFilterPolicy::GetFilterBitsBuilder() const { FilterBitsBuilder* BuiltinFilterPolicy::GetFilterBitsBuilder() const {
// This code path should no longer be used, for the built-in // This code path should no longer be used, for the built-in
// BloomFilterPolicy. Internal to RocksDB and outside // BloomFilterPolicy. Internal to RocksDB and outside
// BloomFilterPolicy, only get a FilterBitsBuilder with // BloomFilterPolicy, only get a FilterBitsBuilder with
@ -1192,7 +1192,7 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderFromContext(
// Read metadata to determine what kind of FilterBitsReader is needed // Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one. // and return a new one.
FilterBitsReader* BloomFilterPolicy::GetFilterBitsReader( FilterBitsReader* BuiltinFilterPolicy::GetFilterBitsReader(
const Slice& contents) const { const Slice& contents) const {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size()); uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
if (len_with_meta <= kMetadataLen) { if (len_with_meta <= kMetadataLen) {
@ -1273,7 +1273,7 @@ FilterBitsReader* BloomFilterPolicy::GetFilterBitsReader(
log2_cache_line_size); log2_cache_line_size);
} }
FilterBitsReader* BloomFilterPolicy::GetRibbonBitsReader( FilterBitsReader* BuiltinFilterPolicy::GetRibbonBitsReader(
const Slice& contents) const { const Slice& contents) const {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size()); uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
uint32_t len = len_with_meta - kMetadataLen; uint32_t len = len_with_meta - kMetadataLen;
@ -1297,7 +1297,7 @@ FilterBitsReader* BloomFilterPolicy::GetRibbonBitsReader(
} }
// For newer Bloom filter implementations // For newer Bloom filter implementations
FilterBitsReader* BloomFilterPolicy::GetBloomBitsReader( FilterBitsReader* BuiltinFilterPolicy::GetBloomBitsReader(
const Slice& contents) const { const Slice& contents) const {
uint32_t len_with_meta = static_cast<uint32_t>(contents.size()); uint32_t len_with_meta = static_cast<uint32_t>(contents.size());
uint32_t len = len_with_meta - kMetadataLen; uint32_t len = len_with_meta - kMetadataLen;
@ -1370,10 +1370,69 @@ const FilterPolicy* NewBloomFilterPolicy(double bits_per_key,
return new BloomFilterPolicy(bits_per_key, m); return new BloomFilterPolicy(bits_per_key, m);
} }
extern const FilterPolicy* NewRibbonFilterPolicy( // Chooses between two filter policies based on LSM level, but
double bloom_equivalent_bits_per_key) { // only for Level and Universal compaction styles. Flush is treated
return new BloomFilterPolicy(bloom_equivalent_bits_per_key, // as level -1. Policy b is considered fallback / primary policy.
BloomFilterPolicy::kStandard128Ribbon); LevelThresholdFilterPolicy::LevelThresholdFilterPolicy(
std::unique_ptr<const FilterPolicy>&& a,
std::unique_ptr<const FilterPolicy>&& b, int starting_level_for_b)
: policy_a_(std::move(a)),
policy_b_(std::move(b)),
starting_level_for_b_(starting_level_for_b) {
// Don't use this wrapper class if you were going to set to -1
assert(starting_level_for_b_ >= 0);
}
// Deprecated block-based filter only
void LevelThresholdFilterPolicy::CreateFilter(const Slice* keys, int n,
std::string* dst) const {
policy_b_->CreateFilter(keys, n, dst);
}
FilterBitsBuilder* LevelThresholdFilterPolicy::GetBuilderWithContext(
const FilterBuildingContext& context) const {
switch (context.compaction_style) {
case kCompactionStyleLevel:
case kCompactionStyleUniversal: {
int levelish;
if (context.reason == TableFileCreationReason::kFlush) {
// Treat flush as level -1
assert(context.level_at_creation == 0);
levelish = -1;
} else if (context.level_at_creation == -1) {
// Unknown level
// Policy b considered fallback / primary
return policy_b_->GetBuilderWithContext(context);
} else {
levelish = context.level_at_creation;
}
if (levelish >= starting_level_for_b_) {
return policy_b_->GetBuilderWithContext(context);
} else {
return policy_a_->GetBuilderWithContext(context);
}
}
case kCompactionStyleFIFO:
case kCompactionStyleNone:
break;
}
// Policy b considered fallback / primary
return policy_b_->GetBuilderWithContext(context);
}
const FilterPolicy* NewRibbonFilterPolicy(double bloom_equivalent_bits_per_key,
int bloom_before_level) {
std::unique_ptr<const FilterPolicy> ribbon_only{new BloomFilterPolicy(
bloom_equivalent_bits_per_key, BloomFilterPolicy::kStandard128Ribbon)};
if (bloom_before_level > -1) {
// Could also use Bloom policy
std::unique_ptr<const FilterPolicy> bloom_only{new BloomFilterPolicy(
bloom_equivalent_bits_per_key, BloomFilterPolicy::kFastLocalBloom)};
return new LevelThresholdFilterPolicy(
std::move(bloom_only), std::move(ribbon_only), bloom_before_level);
} else {
return ribbon_only.release();
}
} }
FilterBuildingContext::FilterBuildingContext( FilterBuildingContext::FilterBuildingContext(
@ -1410,9 +1469,18 @@ Status FilterPolicy::CreateFromString(
policy->reset( policy->reset(
NewExperimentalRibbonFilterPolicy(bloom_equivalent_bits_per_key)); NewExperimentalRibbonFilterPolicy(bloom_equivalent_bits_per_key));
} else if (value.compare(0, kRibbonName.size(), kRibbonName) == 0) { } else if (value.compare(0, kRibbonName.size(), kRibbonName) == 0) {
size_t pos = value.find(':', kRibbonName.size());
int bloom_before_level;
if (pos == std::string::npos) {
pos = value.size();
bloom_before_level = 0;
} else {
bloom_before_level = ParseInt(trim(value.substr(pos + 1)));
}
double bloom_equivalent_bits_per_key = double bloom_equivalent_bits_per_key =
ParseDouble(trim(value.substr(kRibbonName.size()))); ParseDouble(trim(value.substr(kRibbonName.size(), pos)));
policy->reset(NewRibbonFilterPolicy(bloom_equivalent_bits_per_key)); policy->reset(NewRibbonFilterPolicy(bloom_equivalent_bits_per_key,
bloom_before_level));
} else { } else {
return Status::NotFound("Invalid filter policy name ", value); return Status::NotFound("Invalid filter policy name ", value);
#else #else

View File

@ -38,10 +38,39 @@ class BuiltinFilterBitsBuilder : public FilterBitsBuilder {
virtual double EstimatedFpRate(size_t num_entries, size_t bytes) = 0; virtual double EstimatedFpRate(size_t num_entries, size_t bytes) = 0;
}; };
// RocksDB built-in filter policy for Bloom or Bloom-like filters. // Abstract base class for RocksDB built-in filter policies.
// This class is considered internal API and subject to change. // This class is considered internal API and subject to change.
// See NewBloomFilterPolicy. class BuiltinFilterPolicy : public FilterPolicy {
class BloomFilterPolicy : public FilterPolicy { public:
// Shared name because any built-in policy can read filters from
// any other
const char* Name() const override;
// Deprecated block-based filter only
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override;
// Old API
FilterBitsBuilder* GetFilterBitsBuilder() const override;
// Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one. This must successfully process any filter data
// generated by a built-in FilterBitsBuilder, regardless of the impl
// chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
private:
// For Bloom filter implementation(s) (except deprecated block-based filter)
FilterBitsReader* GetBloomBitsReader(const Slice& contents) const;
// For Ribbon filter implementation(s)
FilterBitsReader* GetRibbonBitsReader(const Slice& contents) const;
};
// RocksDB built-in filter policy for Bloom or Bloom-like filters including
// Ribbon filters.
// This class is considered internal API and subject to change.
// See NewBloomFilterPolicy and NewRibbonFilterPolicy.
class BloomFilterPolicy : public BuiltinFilterPolicy {
public: public:
// An internal marker for operating modes of BloomFilterPolicy, in terms // An internal marker for operating modes of BloomFilterPolicy, in terms
// of selecting an implementation. This makes it easier for tests to track // of selecting an implementation. This makes it easier for tests to track
@ -88,16 +117,9 @@ class BloomFilterPolicy : public FilterPolicy {
~BloomFilterPolicy() override; ~BloomFilterPolicy() override;
const char* Name() const override;
// Deprecated block-based filter only // Deprecated block-based filter only
void CreateFilter(const Slice* keys, int n, std::string* dst) const override; void CreateFilter(const Slice* keys, int n, std::string* dst) const override;
// Deprecated block-based filter only
bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override;
FilterBitsBuilder* GetFilterBitsBuilder() const override;
// To use this function, call GetBuilderFromContext(). // To use this function, call GetBuilderFromContext().
// //
// Neither the context nor any objects therein should be saved beyond // Neither the context nor any objects therein should be saved beyond
@ -110,12 +132,6 @@ class BloomFilterPolicy : public FilterPolicy {
// (An internal convenience function to save boilerplate.) // (An internal convenience function to save boilerplate.)
static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&); static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
// Read metadata to determine what kind of FilterBitsReader is needed
// and return a new one. This must successfully process any filter data
// generated by a built-in FilterBitsBuilder, regardless of the impl
// chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
// Essentially for testing only: configured millibits/key // Essentially for testing only: configured millibits/key
int GetMillibitsPerKey() const { return millibits_per_key_; } int GetMillibitsPerKey() const { return millibits_per_key_; }
// Essentially for testing only: legacy whole bits/key // Essentially for testing only: legacy whole bits/key
@ -157,12 +173,33 @@ class BloomFilterPolicy : public FilterPolicy {
// Sum over all generated filters f: // Sum over all generated filters f:
// (predicted_fp_rate(f) - predicted_fp_rate(f|o_f_f_m=false)) * 2^32 // (predicted_fp_rate(f) - predicted_fp_rate(f|o_f_f_m=false)) * 2^32
mutable std::atomic<int64_t> aggregate_rounding_balance_; mutable std::atomic<int64_t> aggregate_rounding_balance_;
};
// For newer Bloom filter implementation(s) // Chooses between two filter policies based on LSM level, but
FilterBitsReader* GetBloomBitsReader(const Slice& contents) const; // only for Level and Universal compaction styles. Flush is treated
// as level -1. Policy b is considered fallback / primary policy.
class LevelThresholdFilterPolicy : public BuiltinFilterPolicy {
public:
LevelThresholdFilterPolicy(std::unique_ptr<const FilterPolicy>&& a,
std::unique_ptr<const FilterPolicy>&& b,
int starting_level_for_b);
// For Ribbon filter implementation(s) // Deprecated block-based filter only
FilterBitsReader* GetRibbonBitsReader(const Slice& contents) const; void CreateFilter(const Slice* keys, int n, std::string* dst) const override;
FilterBitsBuilder* GetBuilderWithContext(
const FilterBuildingContext& context) const override;
inline int TEST_GetStartingLevelForB() const { return starting_level_for_b_; }
inline const FilterPolicy* TEST_GetPolicyA() const { return policy_a_.get(); }
inline const FilterPolicy* TEST_GetPolicyB() const { return policy_b_.get(); }
private:
const std::unique_ptr<const FilterPolicy> policy_a_;
const std::unique_ptr<const FilterPolicy> policy_b_;
int starting_level_for_b_;
}; };
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

View File

@ -104,7 +104,9 @@ default_params = {
"use_clock_cache": 0, # currently broken "use_clock_cache": 0, # currently broken
"use_full_merge_v1": lambda: random.randint(0, 1), "use_full_merge_v1": lambda: random.randint(0, 1),
"use_merge": lambda: random.randint(0, 1), "use_merge": lambda: random.randint(0, 1),
"use_ribbon_filter": lambda: random.randint(0, 1), # 999 -> use Bloom API
"ribbon_starting_level": lambda: random.choice([random.randint(-1, 10), 999]),
"use_block_based_filter": lambda: random.randint(0, 1),
"verify_checksum": 1, "verify_checksum": 1,
"write_buffer_size": 4 * 1024 * 1024, "write_buffer_size": 4 * 1024 * 1024,
"writepercent": 35, "writepercent": 35,
@ -359,6 +361,8 @@ def finalize_and_sanitize(src_params):
dest_params["partition_filters"] = 0 dest_params["partition_filters"] = 0
else: else:
dest_params["use_block_based_filter"] = 0 dest_params["use_block_based_filter"] = 0
if dest_params["ribbon_starting_level"] < 999:
dest_params["use_block_based_filter"] = 0
if dest_params.get("atomic_flush", 0) == 1: if dest_params.get("atomic_flush", 0) == 1:
# disable pipelined write when atomic flush is used. # disable pipelined write when atomic flush is used.
dest_params["enable_pipelined_write"] = 0 dest_params["enable_pipelined_write"] = 0

View File

@ -1195,6 +1195,96 @@ INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
BloomFilterPolicy::kFastLocalBloom, BloomFilterPolicy::kFastLocalBloom,
BloomFilterPolicy::kStandard128Ribbon)); BloomFilterPolicy::kStandard128Ribbon));
static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) {
union {
uint64_t key_value;
char key_bytes[8];
};
const unsigned kNumKeys = 1000;
Slice key_slice{key_bytes, 8};
for (key_value = 0; key_value < kNumKeys; ++key_value) {
builder->AddKey(key_slice);
}
std::unique_ptr<const char[]> buf;
auto filter = builder->Finish(&buf);
return filter.size() * /*bits per byte*/ 8 / (1.0 * kNumKeys);
}
static void SetTestingLevel(int levelish, FilterBuildingContext* ctx) {
if (levelish == -1) {
// Flush is treated as level -1 for this option but actually level 0
ctx->level_at_creation = 0;
ctx->reason = TableFileCreationReason::kFlush;
} else {
ctx->level_at_creation = levelish;
ctx->reason = TableFileCreationReason::kCompaction;
}
}
TEST(RibbonTest, RibbonTestLevelThreshold) {
BlockBasedTableOptions opts;
FilterBuildingContext ctx(opts);
// A few settings
for (CompactionStyle cs : {kCompactionStyleLevel, kCompactionStyleUniversal,
kCompactionStyleFIFO, kCompactionStyleNone}) {
ctx.compaction_style = cs;
for (int bloom_before_level : {-1, 0, 1, 10}) {
std::vector<std::unique_ptr<const FilterPolicy> > policies;
policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level));
if (bloom_before_level == -1) {
// Also test old API
policies.emplace_back(NewExperimentalRibbonFilterPolicy(10));
}
if (bloom_before_level == 0) {
// Also test old API and new API default
policies.emplace_back(NewRibbonFilterPolicy(10));
}
for (std::unique_ptr<const FilterPolicy>& policy : policies) {
// Claim to be generating filter for this level
SetTestingLevel(bloom_before_level, &ctx);
std::unique_ptr<FilterBitsBuilder> builder{
policy->GetBuilderWithContext(ctx)};
// Must be Ribbon (more space efficient than 10 bits per key)
ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
if (bloom_before_level >= 0) {
// Claim to be generating filter for previous level
SetTestingLevel(bloom_before_level - 1, &ctx);
builder.reset(policy->GetBuilderWithContext(ctx));
if (cs == kCompactionStyleLevel || cs == kCompactionStyleUniversal) {
// Level is considered.
// Must be Bloom (~ 10 bits per key)
ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
} else {
// Level is ignored under non-traditional compaction styles.
// Must be Ribbon (more space efficient than 10 bits per key)
ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
}
}
// Like SST file writer
ctx.level_at_creation = -1;
ctx.reason = TableFileCreationReason::kMisc;
builder.reset(policy->GetBuilderWithContext(ctx));
// Must be Ribbon (more space efficient than 10 bits per key)
ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
}
}
}
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) { int main(int argc, char** argv) {