diff --git a/HISTORY.md b/HISTORY.md index 3ce446885..f7249818e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,6 +7,9 @@ * fs_posix.cc GetFreeSpace() always report disk space available to root even when running as non-root. Linux defaults often have disk mounts with 5 to 10 percent of total space reserved only for root. Out of space could result for non-root users. * Subcompactions are now disabled when user-defined timestamps are used, since the subcompaction boundary picking logic is currently not timestamp-aware, which could lead to incorrect results when different subcompactions process keys that only differ by timestamp. +### New Features +* Marked the Ribbon filter and optimize_filters_for_memory features as production-ready, each enabling memory savings for Bloom-like filters. Use `NewRibbonFilterPolicy` in place of `NewBloomFilterPolicy` to use Ribbon filters instead of Bloom, or `ribbonfilter` in place of `bloomfilter` in configuration string. + ## 6.21.0 (2021-05-21) ### Bug Fixes * Fixed a bug in handling file rename error in distributed/network file systems when the server succeeds but client returns error. The bug can cause CURRENT file to point to non-existing MANIFEST file, thus DB cannot be opened. diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index f5eb3c8dc..6a649eca9 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -21,17 +21,37 @@ #include "utilities/fault_injection_fs.h" namespace ROCKSDB_NAMESPACE { + +namespace { + +std::shared_ptr CreateFilterPolicy() { + if (FLAGS_bloom_bits < 0) { + return BlockBasedTableOptions().filter_policy; + } + const FilterPolicy* new_policy; + if (FLAGS_use_ribbon_filter) { + // Old and new API should be same + if (std::random_device()() & 1) { + new_policy = NewExperimentalRibbonFilterPolicy(FLAGS_bloom_bits); + } else { + new_policy = NewRibbonFilterPolicy(FLAGS_bloom_bits); + } + } else { + if (FLAGS_use_block_based_filter) { + new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, true); + } else { + new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false); + } + } + return std::shared_ptr(new_policy); +} + +} // namespace + StressTest::StressTest() : cache_(NewCache(FLAGS_cache_size)), compressed_cache_(NewLRUCache(FLAGS_compressed_cache_size)), - filter_policy_( - FLAGS_bloom_bits >= 0 - ? FLAGS_use_ribbon_filter - ? NewExperimentalRibbonFilterPolicy(FLAGS_bloom_bits) - : FLAGS_use_block_based_filter - ? NewBloomFilterPolicy(FLAGS_bloom_bits, true) - : NewBloomFilterPolicy(FLAGS_bloom_bits, false) - : nullptr), + filter_policy_(CreateFilterPolicy()), db_(nullptr), #ifndef ROCKSDB_LITE txn_db_(nullptr), diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index f6bd9075b..fc1985d32 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -244,25 +244,34 @@ class FilterPolicy { extern const FilterPolicy* NewBloomFilterPolicy( double bits_per_key, bool use_block_based_builder = false); -// An new Bloom alternative that saves about 30% space compared to -// Bloom filters, with about 3-4x construction CPU time and similar -// query times. For example, if you pass in 10 for -// bloom_equivalent_bits_per_key, you'll get the same 0.95% FP rate -// as Bloom filter but only using about 7 bits per key. (This -// way of configuring the new filter is considered experimental -// and/or transitional, so is expected to be replaced with a new API. -// The constructed filters will be given long-term support.) +// A new Bloom alternative that saves about 30% space compared to +// Bloom filters, with similar query times but roughly 3-4x CPU time +// and 3x temporary space usage during construction. For example, if +// you pass in 10 for bloom_equivalent_bits_per_key, you'll get the same +// 0.95% FP rate as Bloom filter but only using about 7 bits per key. // // Ribbon filters are compatible with RocksDB >= 6.15.0. Earlier // versions reading the data will behave as if no filter was used -// (degraded performance until compaction rebuilds filters). +// (degraded performance until compaction rebuilds filters). All +// built-in FilterPolicies (Bloom or Ribbon) are able to read other +// kinds of built-in filters. // -// Note: this policy can generate Bloom filters in some cases. -// For very small filters (well under 1KB), Bloom fallback is by -// design, as the current Ribbon schema is not optimized to save vs. -// Bloom for such small filters. Other cases of Bloom fallback should -// be exceptional and log an appropriate warning. -extern const FilterPolicy* NewExperimentalRibbonFilterPolicy( +// Note: the current Ribbon filter schema uses some extra resources +// when constructing very large filters. For example, for 100 million +// keys in a single filter (one SST file without partitioned filters), +// 3GB of temporary, untracked memory is used, vs. 1GB for Bloom. +// However, the savings in filter space from just ~60 open SST files +// makes up for the additional temporary memory use. +// +// Also consider using optimize_filters_for_memory to save filter +// memory. +extern const FilterPolicy* NewRibbonFilterPolicy( double bloom_equivalent_bits_per_key); +// Old name +inline const FilterPolicy* NewExperimentalRibbonFilterPolicy( + double bloom_equivalent_bits_per_key) { + return NewRibbonFilterPolicy(bloom_equivalent_bits_per_key); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 3c77e21b5..fab428ba1 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -288,13 +288,13 @@ struct BlockBasedTableOptions { // incompatible with block-based filters. bool partition_filters = false; - // EXPERIMENTAL Option to generate Bloom filters that minimize memory + // Option to generate Bloom/Ribbon filters that minimize memory // internal fragmentation. // // When false, malloc_usable_size is not available, or format_version < 5, // filters are generated without regard to internal fragmentation when // loaded into memory (historical behavior). When true (and - // malloc_usable_size is available and format_version >= 5), then Bloom + // malloc_usable_size is available and format_version >= 5), then // filters are generated to "round up" and "round down" their sizes to // minimize internal fragmentation when loaded into memory, assuming the // reading DB has the same memory allocation characteristics as the @@ -313,7 +313,8 @@ struct BlockBasedTableOptions { // NOTE: Because some memory counted by block cache might be unmapped pages // within internal fragmentation, this option can increase observed RSS // memory usage. With cache_index_and_filter_blocks=true, this option makes - // the block cache better at using space it is allowed. + // the block cache better at using space it is allowed. (These issues + // should not arise with partitioned filters.) // // NOTE: Do not set to true if you do not trust malloc_usable_size. With // this option, RocksDB might access an allocated memory object beyond its diff --git a/options/options_test.cc b/options/options_test.cc index aa152257c..93f74c4c6 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -934,14 +934,22 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); - // Experimental Ribbon filter policy + // Ribbon filter policy ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=experimental_ribbon:5.678;", + config_options, table_opt, "filter_policy=ribbonfilter:5.678;", &new_opt)); ASSERT_TRUE(new_opt.filter_policy != nullptr); bfp = dynamic_cast(new_opt.filter_policy.get()); EXPECT_EQ(bfp->GetMillibitsPerKey(), 5678); EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); + // Old name + ASSERT_OK(GetBlockBasedTableOptionsFromString( + config_options, table_opt, "filter_policy=experimental_ribbon:6.789;", + &new_opt)); + ASSERT_TRUE(new_opt.filter_policy != nullptr); + bfp = dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789); + EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); // Check block cache options are overwritten when specified // in new format as a struct. diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index 171d777f7..994ada1ba 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -1370,7 +1370,7 @@ const FilterPolicy* NewBloomFilterPolicy(double bits_per_key, return new BloomFilterPolicy(bits_per_key, m); } -extern const FilterPolicy* NewExperimentalRibbonFilterPolicy( +extern const FilterPolicy* NewRibbonFilterPolicy( double bloom_equivalent_bits_per_key) { return new BloomFilterPolicy(bloom_equivalent_bits_per_key, BloomFilterPolicy::kStandard128Ribbon); @@ -1387,6 +1387,7 @@ Status FilterPolicy::CreateFromString( std::shared_ptr* policy) { const std::string kBloomName = "bloomfilter:"; const std::string kExpRibbonName = "experimental_ribbon:"; + const std::string kRibbonName = "ribbonfilter:"; if (value == kNullptrString || value == "rocksdb.BuiltinBloomFilter") { policy->reset(); #ifndef ROCKSDB_LITE @@ -1408,6 +1409,10 @@ Status FilterPolicy::CreateFromString( ParseDouble(trim(value.substr(kExpRibbonName.size()))); policy->reset( NewExperimentalRibbonFilterPolicy(bloom_equivalent_bits_per_key)); + } else if (value.compare(0, kRibbonName.size(), kRibbonName) == 0) { + double bloom_equivalent_bits_per_key = + ParseDouble(trim(value.substr(kRibbonName.size()))); + policy->reset(NewRibbonFilterPolicy(bloom_equivalent_bits_per_key)); } else { return Status::NotFound("Invalid filter policy name ", value); #else diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 7424c1d53..f17745a86 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -596,8 +596,9 @@ DEFINE_int32(random_access_max_buffer_size, 1024 * 1024, DEFINE_int32(writable_file_max_buffer_size, 1024 * 1024, "Maximum write buffer for Writable File"); -DEFINE_int32(bloom_bits, -1, "Bloom filter bits per key. Negative means" - " use default settings."); +DEFINE_int32(bloom_bits, -1, + "Bloom filter bits per key. Negative means use default." + "Zero disables."); DEFINE_bool(use_ribbon_filter, false, "Use Ribbon instead of Bloom filter"); @@ -2424,7 +2425,6 @@ class Benchmark { private: std::shared_ptr cache_; std::shared_ptr compressed_cache_; - std::shared_ptr filter_policy_; const SliceTransform* prefix_extractor_; DBWithColumnFamilies db_; std::vector multi_dbs_; @@ -2818,13 +2818,6 @@ class Benchmark { Benchmark() : cache_(NewCache(FLAGS_cache_size)), compressed_cache_(NewCache(FLAGS_compressed_cache_size)), - filter_policy_( - FLAGS_use_ribbon_filter - ? NewExperimentalRibbonFilterPolicy(FLAGS_bloom_bits) - : FLAGS_bloom_bits >= 0 - ? NewBloomFilterPolicy(FLAGS_bloom_bits, - FLAGS_use_block_based_filter) - : nullptr), prefix_extractor_(NewFixedPrefixTransform(FLAGS_prefix_size)), num_(FLAGS_num), key_size_(FLAGS_key_size), @@ -3892,7 +3885,7 @@ class Benchmark { int bloom_bits_per_key = FLAGS_bloom_bits; if (bloom_bits_per_key < 0) { - bloom_bits_per_key = 0; + bloom_bits_per_key = PlainTableOptions().bloom_bits_per_key; } PlainTableOptions plain_table_options; @@ -3999,7 +3992,6 @@ class Benchmark { block_based_options.block_restart_interval = FLAGS_block_restart_interval; block_based_options.index_block_restart_interval = FLAGS_index_block_restart_interval; - block_based_options.filter_policy = filter_policy_; block_based_options.format_version = static_cast(FLAGS_format_version); block_based_options.read_amp_bytes_per_bit = FLAGS_read_amp_bytes_per_bit; @@ -4227,10 +4219,14 @@ class Benchmark { if (FLAGS_cache_size) { table_options->block_cache = cache_; } - if (FLAGS_bloom_bits >= 0) { + if (FLAGS_bloom_bits < 0) { + table_options->filter_policy = BlockBasedTableOptions().filter_policy; + } else if (FLAGS_bloom_bits == 0) { + table_options->filter_policy.reset(); + } else { table_options->filter_policy.reset( FLAGS_use_ribbon_filter - ? NewExperimentalRibbonFilterPolicy(FLAGS_bloom_bits) + ? NewRibbonFilterPolicy(FLAGS_bloom_bits) : NewBloomFilterPolicy(FLAGS_bloom_bits, FLAGS_use_block_based_filter)); }