diff --git a/HISTORY.md b/HISTORY.md index 71b59e94a..96e154039 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -26,6 +26,34 @@ * Remove DBOptions::preserved_deletes and DB::SetPreserveDeletesSequenceNumber(). * Remove deprecated API AdvancedColumnFamilyOptions::rate_limit_delay_max_milliseconds. * Removed timestamp from WriteOptions. Accordingly, added to DB APIs Put, Delete, SingleDelete, etc. accepting an additional argument 'timestamp'. Added Put, Delete, SingleDelete, etc to WriteBatch accepting an additional argument 'timestamp'. Removed WriteBatch::AssignTimestamps(vector) API. Renamed WriteBatch::AssignTimestamp() to WriteBatch::UpdateTimestamps() with clarified comments. +* Significant updates to FilterPolicy-related APIs and configuration: + * Remove support for "filter_policy=experimental_ribbon" configuration + string. Use something like "filter_policy=ribbonfilter:10" instead. + * Allow configuration string like "filter_policy=bloomfilter:10" without + bool, to minimize acknowledgement of inefficient block-based filter. + * A `filter_policy` loaded from an OPTIONS file can read existing filters + but still does not support writing new filters. + * Inefficient block-based filter is no longer customizable in the public + API, though (for now) can still be enabled. + * Remove deprecated FilterPolicy::CreateFilter() and + FilterPolicy::KeyMayMatch() + * Remove `rocksdb_filterpolicy_create()` from C API + * Change meaning of nullptr return from GetBuilderWithContext() from "use + block-based filter" to "generate no filter in this case." + * Also, when user specifies bits_per_key < 0.5, we now round this down + to "no filter" because we expect a filter with >= 80% FP rate is + unlikely to be worth the CPU cost of accessing it (esp with + cache_index_and_filter_blocks=1 or partition_filters=1). + * bits_per_key >= 0.5 and < 1.0 is still rounded up to 1.0 (for 62% FP + rate) + * Also removed deprecated functions + * FilterBitsBuilder::CalculateNumEntry() + * FilterPolicy::GetFilterBitsBuilder() + * NewExperimentalRibbonFilterPolicy() + * Remove default implementations of + * FilterBitsBuilder::EstimateEntriesAdded() + * FilterBitsBuilder::ApproximateNumEntries() + * FilterPolicy::GetBuilderWithContext() * Remove default implementation of Name() from FileSystemWrapper. * Rename `SizeApproximationOptions.include_memtabtles` to `SizeApproximationOptions.include_memtables`. * Remove deprecated option DBOptions::max_mem_compaction_level. diff --git a/db/c.cc b/db/c.cc index c99b9fc00..aa48708d6 100644 --- a/db/c.cc +++ b/db/c.cc @@ -290,45 +290,10 @@ struct rocksdb_filterpolicy_t : public FilterPolicy { void* state_; void (*destructor_)(void*); const char* (*name_)(void*); - char* (*create_)( - void*, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length); - unsigned char (*key_match_)( - void*, - const char* key, size_t length, - const char* filter, size_t filter_length); - void (*delete_filter_)( - void*, - const char* filter, size_t filter_length); ~rocksdb_filterpolicy_t() override { (*destructor_)(state_); } const char* Name() const override { return (*name_)(state_); } - - void CreateFilter(const Slice* keys, int n, std::string* dst) const override { - std::vector key_pointers(n); - std::vector key_sizes(n); - for (int i = 0; i < n; i++) { - key_pointers[i] = keys[i].data(); - key_sizes[i] = keys[i].size(); - } - size_t len; - char* filter = (*create_)(state_, &key_pointers[0], &key_sizes[0], n, &len); - dst->append(filter, len); - - if (delete_filter_ != nullptr) { - (*delete_filter_)(state_, filter, len); - } else { - free(filter); - } - } - - bool KeyMayMatch(const Slice& key, const Slice& filter) const override { - return (*key_match_)(state_, key.data(), key.size(), - filter.data(), filter.size()); - } }; struct rocksdb_mergeoperator_t : public MergeOperator { @@ -3779,32 +3744,6 @@ void rocksdb_comparator_destroy(rocksdb_comparator_t* cmp) { delete cmp; } -rocksdb_filterpolicy_t* rocksdb_filterpolicy_create( - void* state, - void (*destructor)(void*), - char* (*create_filter)( - void*, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length), - unsigned char (*key_may_match)( - void*, - const char* key, size_t length, - const char* filter, size_t filter_length), - void (*delete_filter)( - void*, - const char* filter, size_t filter_length), - const char* (*name)(void*)) { - rocksdb_filterpolicy_t* result = new rocksdb_filterpolicy_t; - result->state_ = state; - result->destructor_ = destructor; - result->create_ = create_filter; - result->key_match_ = key_may_match; - result->delete_filter_ = delete_filter; - result->name_ = name; - return result; -} - void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t* filter) { delete filter; } @@ -3818,13 +3757,6 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format( const FilterPolicy* rep_; ~Wrapper() override { delete rep_; } const char* Name() const override { return rep_->Name(); } - void CreateFilter(const Slice* keys, int n, - std::string* dst) const override { - return rep_->CreateFilter(keys, n, dst); - } - bool KeyMayMatch(const Slice& key, const Slice& filter) const override { - return rep_->KeyMayMatch(key, filter); - } // No need to override GetFilterBitsBuilder if this one is overridden ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext( const ROCKSDB_NAMESPACE::FilterBuildingContext& context) @@ -3840,7 +3772,6 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom_format( Wrapper* wrapper = new Wrapper; wrapper->rep_ = NewBloomFilterPolicy(bits_per_key, original_format); wrapper->state_ = nullptr; - wrapper->delete_filter_ = nullptr; wrapper->destructor_ = &Wrapper::DoNothing; return wrapper; } @@ -3863,13 +3794,6 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format( const FilterPolicy* rep_; ~Wrapper() override { delete rep_; } const char* Name() const override { return rep_->Name(); } - void CreateFilter(const Slice* keys, int n, - std::string* dst) const override { - return rep_->CreateFilter(keys, n, dst); - } - bool KeyMayMatch(const Slice& key, const Slice& filter) const override { - return rep_->KeyMayMatch(key, filter); - } ROCKSDB_NAMESPACE::FilterBitsBuilder* GetBuilderWithContext( const ROCKSDB_NAMESPACE::FilterBuildingContext& context) const override { @@ -3885,7 +3809,6 @@ rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_ribbon_format( wrapper->rep_ = NewRibbonFilterPolicy(bloom_equivalent_bits_per_key, bloom_before_level); wrapper->state_ = nullptr; - wrapper->delete_filter_ = nullptr; wrapper->destructor_ = &Wrapper::DoNothing; return wrapper; } diff --git a/db/c_test.c b/db/c_test.c index 1ee01e46d..eaff94f19 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -226,39 +226,6 @@ static const char* CmpName(void* arg) { return "foo"; } -// Custom filter policy -static unsigned char fake_filter_result = 1; -static void FilterDestroy(void* arg) { (void)arg; } -static const char* FilterName(void* arg) { - (void)arg; - return "TestFilter"; -} -static char* FilterCreate( - void* arg, - const char* const* key_array, const size_t* key_length_array, - int num_keys, - size_t* filter_length) { - (void)arg; - (void)key_array; - (void)key_length_array; - (void)num_keys; - *filter_length = 4; - char* result = malloc(4); - memcpy(result, "fake", 4); - return result; -} -static unsigned char FilterKeyMatch( - void* arg, - const char* key, size_t length, - const char* filter, size_t filter_length) { - (void)arg; - (void)key; - (void)length; - CheckCondition(filter_length == 4); - CheckCondition(memcmp(filter, "fake", 4) == 0); - return fake_filter_result; -} - // Custom compaction filter static void CFilterDestroy(void* arg) { (void)arg; } static const char* CFilterName(void* arg) { @@ -1042,18 +1009,15 @@ int main(int argc, char** argv) { } StartPhase("filter"); - for (run = 0; run <= 4; run++) { - // run=0 uses custom filter + for (run = 1; run <= 4; run++) { + // run=0 uses custom filter (not currently supported) // run=1 uses old block-based bloom filter // run=2 run uses full bloom filter // run=3 uses Ribbon // run=4 uses Ribbon-Bloom hybrid configuration CheckNoError(err); rocksdb_filterpolicy_t* policy; - if (run == 0) { - policy = rocksdb_filterpolicy_create(NULL, FilterDestroy, FilterCreate, - FilterKeyMatch, NULL, FilterName); - } else if (run == 1) { + if (run == 1) { policy = rocksdb_filterpolicy_create_bloom(8.0); } else if (run == 2) { policy = rocksdb_filterpolicy_create_bloom_full(8.0); @@ -1088,19 +1052,8 @@ int main(int argc, char** argv) { } rocksdb_compact_range(db, NULL, 0, NULL, 0); - fake_filter_result = 1; CheckGet(db, roptions, "foo", "foovalue"); CheckGet(db, roptions, "bar", "barvalue"); - if (run == 0) { - // Must not find value when custom filter returns false - fake_filter_result = 0; - CheckGet(db, roptions, "foo", NULL); - CheckGet(db, roptions, "bar", NULL); - fake_filter_result = 1; - - CheckGet(db, roptions, "foo", "foovalue"); - CheckGet(db, roptions, "bar", "barvalue"); - } { // Query some keys not added to identify Bloom filter implementation @@ -1113,7 +1066,6 @@ int main(int argc, char** argv) { int i; char keybuf[100]; for (i = 0; i < keys_to_query; i++) { - fake_filter_result = i % 2; snprintf(keybuf, sizeof(keybuf), "no%020d", i); CheckGet(db, roptions, keybuf, NULL); } diff --git a/db/db_bloom_filter_test.cc b/db/db_bloom_filter_test.cc index db621912d..534d2f656 100644 --- a/db/db_bloom_filter_test.cc +++ b/db/db_bloom_filter_test.cc @@ -558,6 +558,168 @@ TEST_P(DBBloomFilterTestWithParam, BloomFilter) { } while (ChangeCompactOptions()); } +namespace { + +class AlwaysTrueBitsBuilder : public FilterBitsBuilder { + public: + void AddKey(const Slice&) override {} + size_t EstimateEntriesAdded() override { return 0U; } + Slice Finish(std::unique_ptr* /* buf */) override { + // Interpreted as "always true" filter (0 probes over 1 byte of + // payload, 5 bytes metadata) + return Slice("\0\0\0\0\0\0", 6); + } + using FilterBitsBuilder::Finish; + size_t ApproximateNumEntries(size_t) override { return SIZE_MAX; } +}; + +class AlwaysTrueFilterPolicy : public BloomFilterPolicy { + public: + explicit AlwaysTrueFilterPolicy(bool skip) + : BloomFilterPolicy(/* ignored */ 10, /* ignored */ BFP::kAutoBloom), + skip_(skip) {} + + FilterBitsBuilder* GetBuilderWithContext( + const FilterBuildingContext&) const override { + if (skip_) { + return nullptr; + } else { + return new AlwaysTrueBitsBuilder(); + } + } + + private: + bool skip_; +}; + +} // namespace + +TEST_P(DBBloomFilterTestWithParam, SkipFilterOnEssentiallyZeroBpk) { + constexpr int maxKey = 10; + auto PutFn = [&]() { + int i; + // Put + for (i = 0; i < maxKey; i++) { + ASSERT_OK(Put(Key(i), Key(i))); + } + Flush(); + }; + auto GetFn = [&]() { + int i; + // Get OK + for (i = 0; i < maxKey; i++) { + ASSERT_EQ(Key(i), Get(Key(i))); + } + // Get NotFound + for (; i < maxKey * 2; i++) { + ASSERT_EQ(Get(Key(i)), "NOT_FOUND"); + } + }; + auto PutAndGetFn = [&]() { + PutFn(); + GetFn(); + }; +#ifndef ROCKSDB_LITE + std::map props; + const auto& kAggTableProps = DB::Properties::kAggregatedTableProperties; +#endif // ROCKSDB_LITE + + Options options = CurrentOptions(); + options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.partition_filters = partition_filters_; + if (partition_filters_) { + table_options.index_type = + BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch; + } + table_options.format_version = format_version_; + + // Test 1: bits per key < 0.5 means skip filters -> no filter + // constructed or read. + table_options.filter_policy.reset(new BFP(0.4, bfp_impl_)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + PutAndGetFn(); + + // Verify no filter access nor contruction + EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); + EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); + +#ifndef ROCKSDB_LITE + props.clear(); + ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); + EXPECT_EQ(props["filter_size"], "0"); +#endif // ROCKSDB_LITE + + // Test 2: use custom API to skip filters -> no filter constructed + // or read. + table_options.filter_policy.reset( + new AlwaysTrueFilterPolicy(/* skip */ true)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + PutAndGetFn(); + + // Verify no filter access nor construction + EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); + EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); + +#ifndef ROCKSDB_LITE + props.clear(); + ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); + EXPECT_EQ(props["filter_size"], "0"); +#endif // ROCKSDB_LITE + + // Control test: using an actual filter with 100% FP rate -> the filter + // is constructed and checked on read. + table_options.filter_policy.reset( + new AlwaysTrueFilterPolicy(/* skip */ false)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + DestroyAndReopen(options); + PutAndGetFn(); + + // Verify filter is accessed (and constructed) + EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), + maxKey * 2); + EXPECT_EQ( + TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), + maxKey); +#ifndef ROCKSDB_LITE + props.clear(); + ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); + EXPECT_NE(props["filter_size"], "0"); +#endif // ROCKSDB_LITE + + // Test 3 (options test): Able to read existing filters with longstanding + // generated options file entry `filter_policy=rocksdb.BuiltinBloomFilter` + ASSERT_OK(FilterPolicy::CreateFromString(ConfigOptions(), + "rocksdb.BuiltinBloomFilter", + &table_options.filter_policy)); + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Reopen(options); + GetFn(); + + // Verify filter is accessed + EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), + maxKey * 2); + EXPECT_EQ( + TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), + maxKey); + + // But new filters are not generated (configuration details unknown) + DestroyAndReopen(options); + PutAndGetFn(); + + // Verify no filter access nor construction + EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE), 0); + EXPECT_EQ(TestGetTickerCount(options, BLOOM_FILTER_FULL_TRUE_POSITIVE), 0); + +#ifndef ROCKSDB_LITE + props.clear(); + ASSERT_TRUE(db_->GetMapProperty(kAggTableProps, &props)); + EXPECT_EQ(props["filter_size"], "0"); +#endif // ROCKSDB_LITE +} + #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN) INSTANTIATE_TEST_CASE_P( FormatDef, DBBloomFilterTestDefFormatVersion, @@ -1376,84 +1538,6 @@ TEST_P(DBFilterConstructionCorruptionTestWithParam, DetectCorruption) { "TamperFilter"); } -namespace { -// A wrapped bloom over block-based FilterPolicy -class TestingWrappedBlockBasedFilterPolicy : public FilterPolicy { - public: - explicit TestingWrappedBlockBasedFilterPolicy(int bits_per_key) - : filter_(NewBloomFilterPolicy(bits_per_key, true)), counter_(0) {} - - ~TestingWrappedBlockBasedFilterPolicy() override { delete filter_; } - - const char* Name() const override { - return "TestingWrappedBlockBasedFilterPolicy"; - } - - void CreateFilter(const ROCKSDB_NAMESPACE::Slice* keys, int n, - std::string* dst) const override { - std::unique_ptr user_keys( - new ROCKSDB_NAMESPACE::Slice[n]); - for (int i = 0; i < n; ++i) { - user_keys[i] = convertKey(keys[i]); - } - return filter_->CreateFilter(user_keys.get(), n, dst); - } - - bool KeyMayMatch(const ROCKSDB_NAMESPACE::Slice& key, - const ROCKSDB_NAMESPACE::Slice& filter) const override { - counter_++; - return filter_->KeyMayMatch(convertKey(key), filter); - } - - uint32_t GetCounter() { return counter_; } - - private: - const FilterPolicy* filter_; - mutable uint32_t counter_; - - ROCKSDB_NAMESPACE::Slice convertKey( - const ROCKSDB_NAMESPACE::Slice& key) const { - return key; - } -}; -} // namespace - -TEST_F(DBBloomFilterTest, WrappedBlockBasedFilterPolicy) { - Options options = CurrentOptions(); - options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - - BlockBasedTableOptions table_options; - TestingWrappedBlockBasedFilterPolicy* policy = - new TestingWrappedBlockBasedFilterPolicy(10); - table_options.filter_policy.reset(policy); - options.table_factory.reset(NewBlockBasedTableFactory(table_options)); - - CreateAndReopenWithCF({"pikachu"}, options); - - const int maxKey = 10000; - for (int i = 0; i < maxKey; i++) { - ASSERT_OK(Put(1, Key(i), Key(i))); - } - // Add a large key to make the file contain wide range - ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555))); - ASSERT_EQ(0U, policy->GetCounter()); - Flush(1); - - // Check if they can be found - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ(Key(i), Get(1, Key(i))); - } - ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0); - ASSERT_EQ(1U * maxKey, policy->GetCounter()); - - // Check if filter is useful - for (int i = 0; i < maxKey; i++) { - ASSERT_EQ("NOT_FOUND", Get(1, Key(i + 33333))); - } - ASSERT_GE(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), maxKey * 0.98); - ASSERT_EQ(2U * maxKey, policy->GetCounter()); -} - namespace { // NOTE: This class is referenced by HISTORY.md as a model for a wrapper // FilterPolicy selecting among configurations based on context. @@ -1486,14 +1570,6 @@ class LevelAndStyleCustomFilterPolicy : public FilterPolicy { return policy_fifo_->GetFilterBitsReader(contents); } - // Defer just in case configuration uses block-based filter - void CreateFilter(const Slice* keys, int n, std::string* dst) const override { - policy_otherwise_->CreateFilter(keys, n, dst); - } - bool KeyMayMatch(const Slice& key, const Slice& filter) const override { - return policy_otherwise_->KeyMayMatch(key, filter); - } - private: const std::unique_ptr policy_fifo_; const std::unique_ptr policy_l0_other_; diff --git a/include/rocksdb/c.h b/include/rocksdb/c.h index 4b162ff39..f0d32bd41 100644 --- a/include/rocksdb/c.h +++ b/include/rocksdb/c.h @@ -1572,15 +1572,6 @@ extern ROCKSDB_LIBRARY_API void rocksdb_comparator_destroy( /* Filter policy */ -extern ROCKSDB_LIBRARY_API rocksdb_filterpolicy_t* rocksdb_filterpolicy_create( - void* state, void (*destructor)(void*), - char* (*create_filter)(void*, const char* const* key_array, - const size_t* key_length_array, int num_keys, - size_t* filter_length), - unsigned char (*key_may_match)(void*, const char* key, size_t length, - const char* filter, size_t filter_length), - void (*delete_filter)(void*, const char* filter, size_t filter_length), - const char* (*name)(void*)); extern ROCKSDB_LIBRARY_API void rocksdb_filterpolicy_destroy( rocksdb_filterpolicy_t*); diff --git a/include/rocksdb/filter_policy.h b/include/rocksdb/filter_policy.h index 38f97ede4..796ee0b78 100644 --- a/include/rocksdb/filter_policy.h +++ b/include/rocksdb/filter_policy.h @@ -28,6 +28,7 @@ #include #include "rocksdb/advanced_options.h" +#include "rocksdb/memory_allocator.h" #include "rocksdb/status.h" #include "rocksdb/types.h" @@ -53,12 +54,8 @@ class FilterBitsBuilder { // Called by RocksDB before Finish to populate // TableProperties::num_filter_entries, so should represent the // number of unique keys (and/or prefixes) added, but does not have - // to be exact. - virtual size_t EstimateEntriesAdded() { - // Default implementation for backward compatibility. - // 0 conspicuously stands for "unknown". - return 0; - } + // to be exact. `return 0;` may be used to conspicuously indicate "unknown". + virtual size_t EstimateEntriesAdded() = 0; // Generate the filter using the keys that are added // The return value of this function would be the filter bits, @@ -99,22 +96,7 @@ class FilterBitsBuilder { // Approximate the number of keys that can be added and generate a filter // <= the specified number of bytes. Callers (including RocksDB) should // only use this result for optimizing performance and not as a guarantee. - // This default implementation is for compatibility with older custom - // FilterBitsBuilders only implementing deprecated CalculateNumEntry. - virtual size_t ApproximateNumEntries(size_t bytes) { - bytes = std::min(bytes, size_t{0xffffffff}); - return static_cast(CalculateNumEntry(static_cast(bytes))); - } - - // Old, DEPRECATED version of ApproximateNumEntries. This is not - // called by RocksDB except as the default implementation of - // ApproximateNumEntries for API compatibility. - virtual int CalculateNumEntry(const uint32_t bytes) { - // DEBUG: ideally should not rely on this implementation - assert(false); - // RELEASE: something reasonably conservative: 2 bytes per entry - return static_cast(bytes / 2); - } + virtual size_t ApproximateNumEntries(size_t bytes) = 0; }; // A class that checks if a key can be in filter @@ -208,49 +190,17 @@ class FilterPolicy { // passed to methods of this type. virtual const char* Name() const = 0; - // DEPRECATED: This function is part of the deprecated block-based - // filter, which will be removed in a future release. - // - // keys[0,n-1] contains a list of keys (potentially with duplicates) - // that are ordered according to the user supplied comparator. - // Append a filter that summarizes keys[0,n-1] to *dst. - // - // Warning: do not change the initial contents of *dst. Instead, - // append the newly constructed filter to *dst. - virtual void CreateFilter(const Slice* keys, int n, - std::string* dst) const = 0; - - // DEPRECATED: This function is part of the deprecated block-based - // filter, which will be removed in a future release. - // - // "filter" contains the data appended by a preceding call to - // CreateFilter() on this class. This method must return true if - // the key was in the list of keys passed to CreateFilter(). - // This method may return true or false if the key was not on the - // list, but it should aim to return false with a high probability. - virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0; - - // Return a new FilterBitsBuilder for full or partitioned filter blocks, or - // nullptr if using block-based filter. - // NOTE: This function is only called by GetBuilderWithContext() below for - // custom FilterPolicy implementations. Thus, it is not necessary to - // override this function if overriding GetBuilderWithContext(). - // DEPRECATED: This function will be removed in a future release. - virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; } - - // A newer variant of GetFilterBitsBuilder that allows a FilterPolicy - // to customize the builder for contextual constraints and hints. - // (Name changed to avoid triggering -Werror=overloaded-virtual.) - // If overriding GetFilterBitsBuilder() suffices, it is not necessary to - // override this function. + // Return a new FilterBitsBuilder for constructing full or partitioned + // filter blocks. The configuration details can depend on the input + // FilterBuildingContext but must be serialized such that FilterBitsReader + // can operate based on the block contents without knowing a + // FilterBuildingContext. + // Change in 7.0 release: returning nullptr indicates "no filter" virtual FilterBitsBuilder* GetBuilderWithContext( - const FilterBuildingContext&) const { - return GetFilterBitsBuilder(); - } + const FilterBuildingContext&) const = 0; - // Return a new FilterBitsReader for full or partitioned filter blocks, or - // nullptr if using block-based filter. - // As here, the input slice should NOT be deleted by FilterPolicy. + // Return a new FilterBitsReader for full or partitioned filter blocks. + // Caller retains ownership of any buffer pointed to by the input Slice. virtual FilterBitsReader* GetFilterBitsReader( const Slice& /*contents*/) const { return nullptr; @@ -320,10 +270,4 @@ extern const FilterPolicy* NewBloomFilterPolicy( extern const FilterPolicy* NewRibbonFilterPolicy( double bloom_equivalent_bits_per_key, int bloom_before_level = 0); -// Old name and old default behavior (DEPRECATED) -inline const FilterPolicy* NewExperimentalRibbonFilterPolicy( - double bloom_equivalent_bits_per_key) { - return NewRibbonFilterPolicy(bloom_equivalent_bits_per_key, -1); -} - } // namespace ROCKSDB_NAMESPACE diff --git a/options/options_test.cc b/options/options_test.cc index 99cd7019f..f5c54f1d1 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -930,16 +930,13 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); - // unrecognized filter policy config - s = GetBlockBasedTableOptionsFromString(config_options, table_opt, - "cache_index_and_filter_blocks=1;" - "filter_policy=bloomfilter:4", - &new_opt); - ASSERT_NOK(s); - ASSERT_TRUE(s.IsInvalidArgument()); - ASSERT_EQ(table_opt.cache_index_and_filter_blocks, - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); + // Used to be rejected, now accepted + ASSERT_OK(GetBlockBasedTableOptionsFromString( + config_options, table_opt, "filter_policy=bloomfilter:4", &new_opt)); + bfp = dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); + EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); + EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom); // Ribbon filter policy (no Bloom hybrid) ASSERT_OK(GetBlockBasedTableOptionsFromString( @@ -984,15 +981,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) { EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789); EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); - // Old name - ASSERT_OK(GetBlockBasedTableOptionsFromString( - config_options, table_opt, "filter_policy=experimental_ribbon:6.789;", - &new_opt)); - ASSERT_TRUE(new_opt.filter_policy != nullptr); - bfp = dynamic_cast(new_opt.filter_policy.get()); - EXPECT_EQ(bfp->GetMillibitsPerKey(), 6789); - EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kStandard128Ribbon); - // Check block cache options are overwritten when specified // in new format as a struct. ASSERT_OK(GetBlockBasedTableOptionsFromString( @@ -2806,10 +2794,10 @@ TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) { ASSERT_EQ(new_opt.format_version, 5U); ASSERT_EQ(new_opt.whole_key_filtering, true); ASSERT_TRUE(new_opt.filter_policy != nullptr); - const BloomFilterPolicy& bfp = - dynamic_cast(*new_opt.filter_policy); - EXPECT_EQ(bfp.GetMillibitsPerKey(), 4567); - EXPECT_EQ(bfp.GetWholeBitsPerKey(), 5); + const BloomFilterPolicy* bfp = + dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(bfp->GetMillibitsPerKey(), 4567); + EXPECT_EQ(bfp->GetWholeBitsPerKey(), 5); // unknown option ASSERT_NOK(GetBlockBasedTableOptionsFromString(table_opt, @@ -2845,14 +2833,13 @@ TEST_F(OptionsOldApiTest, GetBlockBasedTableOptionsFromString) { new_opt.cache_index_and_filter_blocks); ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); - // unrecognized filter policy config - ASSERT_NOK(GetBlockBasedTableOptionsFromString(table_opt, - "cache_index_and_filter_blocks=1;" - "filter_policy=bloomfilter:4", - &new_opt)); - ASSERT_EQ(table_opt.cache_index_and_filter_blocks, - new_opt.cache_index_and_filter_blocks); - ASSERT_EQ(table_opt.filter_policy, new_opt.filter_policy); + // Used to be rejected, now accepted + ASSERT_OK(GetBlockBasedTableOptionsFromString( + table_opt, "filter_policy=bloomfilter:4", &new_opt)); + bfp = dynamic_cast(new_opt.filter_policy.get()); + EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000); + EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4); + EXPECT_EQ(bfp->GetMode(), BloomFilterPolicy::kAutoBloom); // Check block cache options are overwritten when specified // in new format as a struct. diff --git a/table/block_based/block_based_filter_block.cc b/table/block_based/block_based_filter_block.cc index a6ee462de..710a9cb49 100644 --- a/table/block_based/block_based_filter_block.cc +++ b/table/block_based/block_based_filter_block.cc @@ -8,6 +8,7 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "table/block_based/block_based_filter_block.h" + #include #include "db/dbformat.h" @@ -62,15 +63,13 @@ static const size_t kFilterBase = 1 << kFilterBaseLg; BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( const SliceTransform* prefix_extractor, - const BlockBasedTableOptions& table_opt) - : policy_(table_opt.filter_policy.get()), - prefix_extractor_(prefix_extractor), + const BlockBasedTableOptions& table_opt, int bits_per_key) + : prefix_extractor_(prefix_extractor), whole_key_filtering_(table_opt.whole_key_filtering), + bits_per_key_(bits_per_key), prev_prefix_start_(0), prev_prefix_size_(0), - total_added_in_built_(0) { - assert(policy_); -} + total_added_in_built_(0) {} void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { uint64_t filter_index = (block_offset / kFilterBase); @@ -158,8 +157,9 @@ void BlockBasedFilterBlockBuilder::GenerateFilter() { // Generate filter for current set of keys and append to result_. filter_offsets_.push_back(static_cast(result_.size())); - policy_->CreateFilter(&tmp_entries_[0], static_cast(num_entries), - &result_); + BloomFilterPolicy::CreateFilter(tmp_entries_.data(), + static_cast(num_entries), bits_per_key_, + &result_); tmp_entries_.clear(); entries_.clear(); @@ -282,9 +282,8 @@ bool BlockBasedFilterBlockReader::MayMatch( assert(table()); assert(table()->get_rep()); - const FilterPolicy* const policy = table()->get_rep()->filter_policy; - const bool may_match = policy->KeyMayMatch(entry, filter); + const bool may_match = BloomFilterPolicy::KeyMayMatch(entry, filter); if (may_match) { PERF_COUNTER_ADD(bloom_sst_hit_count, 1); return true; diff --git a/table/block_based/block_based_filter_block.h b/table/block_based/block_based_filter_block.h index fd20df846..156da0492 100644 --- a/table/block_based/block_based_filter_block.h +++ b/table/block_based/block_based_filter_block.h @@ -15,6 +15,7 @@ #include #include + #include #include #include @@ -23,6 +24,7 @@ #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "table/block_based/filter_block_reader_common.h" +#include "table/block_based/filter_policy_internal.h" #include "table/format.h" #include "util/hash.h" @@ -37,7 +39,8 @@ namespace ROCKSDB_NAMESPACE { class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { public: BlockBasedFilterBlockBuilder(const SliceTransform* prefix_extractor, - const BlockBasedTableOptions& table_opt); + const BlockBasedTableOptions& table_opt, + int bits_per_key); // No copying allowed BlockBasedFilterBlockBuilder(const BlockBasedFilterBlockBuilder&) = delete; void operator=(const BlockBasedFilterBlockBuilder&) = delete; @@ -62,9 +65,9 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { // important: all of these might point to invalid addresses // at the time of destruction of this filter block. destructor // should NOT dereference them. - const FilterPolicy* policy_; const SliceTransform* prefix_extractor_; bool whole_key_filtering_; + int bits_per_key_; size_t prev_prefix_start_; // the position of the last appended prefix // to "entries_". diff --git a/table/block_based/block_based_filter_block_test.cc b/table/block_based/block_based_filter_block_test.cc index 862e90233..943a3e941 100644 --- a/table/block_based/block_based_filter_block_test.cc +++ b/table/block_based/block_based_filter_block_test.cc @@ -19,230 +19,6 @@ namespace ROCKSDB_NAMESPACE { -// For testing: emit an array with one hash value per key -class TestHashFilter : public FilterPolicy { - public: - const char* Name() const override { return "TestHashFilter"; } - - void CreateFilter(const Slice* keys, int n, std::string* dst) const override { - for (int i = 0; i < n; i++) { - uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); - PutFixed32(dst, h); - } - } - - bool KeyMayMatch(const Slice& key, const Slice& filter) const override { - uint32_t h = Hash(key.data(), key.size(), 1); - for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { - if (h == DecodeFixed32(filter.data() + i)) { - return true; - } - } - return false; - } -}; - -class MockBlockBasedTable : public BlockBasedTable { - public: - explicit MockBlockBasedTable(Rep* rep) - : BlockBasedTable(rep, nullptr /* block_cache_tracer */) {} -}; - -class FilterBlockTest : public mock::MockBlockBasedTableTester, - public testing::Test { - public: - FilterBlockTest() : mock::MockBlockBasedTableTester(new TestHashFilter) {} -}; - -TEST_F(FilterBlockTest, EmptyBuilder) { - BlockBasedFilterBlockBuilder builder(nullptr, table_options_); - Slice slice(builder.Finish()); - ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(slice)); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - BlockBasedFilterBlockReader reader(table_.get(), std::move(block)); - ASSERT_TRUE(reader.KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/100000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); -} - -TEST_F(FilterBlockTest, SingleChunk) { - BlockBasedFilterBlockBuilder builder(nullptr, table_options_); - ASSERT_TRUE(builder.IsEmpty()); - builder.StartBlock(100); - builder.Add("foo"); - ASSERT_FALSE(builder.IsEmpty()); - builder.Add("bar"); - builder.Add("bar"); - builder.Add("box"); - builder.StartBlock(200); - builder.Add("box"); - builder.StartBlock(300); - builder.Add("hello"); - // XXX: "bar" should only count once but is counted twice. This actually - // indicates a serious space usage bug in old block-based filter. Good - // that it is deprecated. - // "box" counts twice, because it's in distinct blocks. - ASSERT_EQ(6, builder.EstimateEntriesAdded()); - ASSERT_FALSE(builder.IsEmpty()); - Status s; - Slice slice = builder.Finish(BlockHandle(), &s); - ASSERT_OK(s); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - BlockBasedFilterBlockReader reader(table_.get(), std::move(block)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, - /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, - /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, - /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "missing", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "other", /*prefix_extractor=*/nullptr, /*block_offset=*/100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); -} - -TEST_F(FilterBlockTest, MultiChunk) { - BlockBasedFilterBlockBuilder builder(nullptr, table_options_); - - // First filter - builder.StartBlock(0); - builder.Add("foo"); - builder.StartBlock(2000); - builder.Add("bar"); - - // Second filter - builder.StartBlock(3100); - builder.Add("box"); - - // Third filter is empty - - // Last filter - builder.StartBlock(9000); - builder.Add("box"); - builder.Add("hello"); - - Slice slice(builder.Finish()); - - CachableEntry block( - new BlockContents(slice), nullptr /* cache */, nullptr /* cache_handle */, - true /* own_value */); - - BlockBasedFilterBlockReader reader(table_.get(), std::move(block)); - - // Check first filter - ASSERT_TRUE(reader.KeyMayMatch("foo", /*prefix_extractor=*/nullptr, - /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("bar", /*prefix_extractor=*/nullptr, - /*block_offset=*/2000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/uint64_t{0}, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check second filter - ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, - /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/3100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check third filter (empty) - ASSERT_TRUE(!reader.KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "box", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "hello", /*prefix_extractor=*/nullptr, /*block_offset=*/4100, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - - // Check last filter - ASSERT_TRUE(reader.KeyMayMatch("box", /*prefix_extractor=*/nullptr, - /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(reader.KeyMayMatch("hello", /*prefix_extractor=*/nullptr, - /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, - /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "foo", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); - ASSERT_TRUE(!reader.KeyMayMatch( - "bar", /*prefix_extractor=*/nullptr, /*block_offset=*/9000, - /*no_io=*/false, /*const_ikey_ptr=*/nullptr, /*get_context=*/nullptr, - /*lookup_context=*/nullptr)); -} - // Test for block based filter block // use new interface in FilterPolicy to create filter builder/reader class BlockBasedFilterBlockTest : public mock::MockBlockBasedTableTester, @@ -254,7 +30,7 @@ class BlockBasedFilterBlockTest : public mock::MockBlockBasedTableTester, TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_); + new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); Slice slice(builder->Finish()); ASSERT_EQ("\\x00\\x00\\x00\\x00\\x0b", EscapeString(slice)); @@ -279,7 +55,7 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedEmptyBuilder) { TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_); + new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); builder->StartBlock(100); builder->Add("foo"); builder->Add("bar"); @@ -331,7 +107,7 @@ TEST_F(BlockBasedFilterBlockTest, BlockBasedSingleChunk) { TEST_F(BlockBasedFilterBlockTest, BlockBasedMultiChunk) { FilterBlockBuilder* builder = - new BlockBasedFilterBlockBuilder(nullptr, table_options_); + new BlockBasedFilterBlockBuilder(nullptr, table_options_, 10); // First filter builder->StartBlock(0); diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index b4a4cfca6..f8aec88b1 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -78,9 +78,18 @@ FilterBlockBuilder* CreateFilterBlockBuilder( FilterBitsBuilder* filter_bits_builder = BloomFilterPolicy::GetBuilderFromContext(context); if (filter_bits_builder == nullptr) { - return new BlockBasedFilterBlockBuilder(mopt.prefix_extractor.get(), - table_opt); + return nullptr; } else { + // Check for backdoor deprecated block-based bloom config + size_t starting_est = filter_bits_builder->EstimateEntriesAdded(); + constexpr auto kSecretStart = BloomFilterPolicy::kSecretBitsPerKeyStart; + if (starting_est >= kSecretStart && starting_est < kSecretStart + 100) { + int bits_per_key = static_cast(starting_est - kSecretStart); + delete filter_bits_builder; + return new BlockBasedFilterBlockBuilder(mopt.prefix_extractor.get(), + table_opt, bits_per_key); + } + // END check for backdoor deprecated block-based bloom config if (table_opt.partition_filters) { assert(p_index_builder != nullptr); // Since after partition cut request from filter builder it takes time diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index f916ab4b4..bd24e3a2d 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -1323,7 +1323,11 @@ const std::vector BloomFilterPolicy::kAllUserModes = { BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode) : mode_(mode), warned_(false), aggregate_rounding_balance_(0) { // Sanitize bits_per_key - if (bits_per_key < 1.0) { + if (bits_per_key < 0.5) { + // Round down to no filter + bits_per_key = 0; + } else if (bits_per_key < 1.0) { + // Minimum 1 bit per key (equiv) when creating filter bits_per_key = 1.0; } else if (!(bits_per_key < 100.0)) { // including NaN bits_per_key = 100.0; @@ -1355,14 +1359,10 @@ const char* BuiltinFilterPolicy::Name() const { return "rocksdb.BuiltinBloomFilter"; } -void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, - std::string* dst) const { - // We should ideally only be using this deprecated interface for - // appropriately constructed BloomFilterPolicy - assert(mode_ == kDeprecatedBlock); - +void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, int bits_per_key, + std::string* dst) { // Compute bloom filter size (in both bits and bytes) - uint32_t bits = static_cast(n * whole_bits_per_key_); + uint32_t bits = static_cast(n * bits_per_key); // For small n, we can see a very high false positive rate. Fix it // by enforcing a minimum bloom filter length. @@ -1371,8 +1371,7 @@ void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, uint32_t bytes = (bits + 7) / 8; bits = bytes * 8; - int num_probes = - LegacyNoLocalityBloomImpl::ChooseNumProbes(whole_bits_per_key_); + int num_probes = LegacyNoLocalityBloomImpl::ChooseNumProbes(bits_per_key); const size_t init_size = dst->size(); dst->resize(init_size + bytes, 0); @@ -1384,8 +1383,8 @@ void BloomFilterPolicy::CreateFilter(const Slice* keys, int n, } } -bool BuiltinFilterPolicy::KeyMayMatch(const Slice& key, - const Slice& bloom_filter) const { +bool BloomFilterPolicy::KeyMayMatch(const Slice& key, + const Slice& bloom_filter) { const size_t len = bloom_filter.size(); if (len < 2 || len > 0xffffffffU) { return false; @@ -1407,20 +1406,12 @@ bool BuiltinFilterPolicy::KeyMayMatch(const Slice& key, array); } -FilterBitsBuilder* BuiltinFilterPolicy::GetFilterBitsBuilder() const { - // This code path should no longer be used, for the built-in - // BloomFilterPolicy. Internal to RocksDB and outside - // BloomFilterPolicy, only get a FilterBitsBuilder with - // BloomFilterPolicy::GetBuilderFromContext(), which will call - // BloomFilterPolicy::GetBuilderWithContext(). RocksDB users have - // been warned (HISTORY.md) that they can no longer call this on - // the built-in BloomFilterPolicy (unlikely). - assert(false); - return GetBuilderWithContext(FilterBuildingContext(BlockBasedTableOptions())); -} - FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( const FilterBuildingContext& context) const { + if (millibits_per_key_ == 0) { + // "No filter" special case + return nullptr; + } Mode cur = mode_; bool offm = context.table_options.optimize_filters_for_memory; bool reserve_filter_construction_mem = @@ -1442,7 +1433,7 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( cur = kFastLocalBloom; } break; - case kDeprecatedBlock: + case kDeprecatedBlock: { if (context.info_log && !warned_.load(std::memory_order_relaxed)) { warned_ = true; ROCKS_LOG_WARN(context.info_log, @@ -1450,7 +1441,22 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext( "inefficient (%d bits per key).", whole_bits_per_key_); } - return nullptr; + // Internal contract: returns a new fake builder that encodes bits per + // key into a special value from EstimateEntriesAdded() + struct B : public FilterBitsBuilder { + explicit B(int bits_per_key) + : est(kSecretBitsPerKeyStart + bits_per_key) {} + size_t est; + size_t EstimateEntriesAdded() override { return est; } + void AddKey(const Slice&) override {} + using FilterBitsBuilder::Finish; // FIXME + Slice Finish(std::unique_ptr*) override { + return Slice(); + } + size_t ApproximateNumEntries(size_t) override { return 0; } + }; + return new B(GetWholeBitsPerKey()); + } case kFastLocalBloom: return new FastLocalBloomBitsBuilder( millibits_per_key_, offm ? &aggregate_rounding_balance_ : nullptr, @@ -1696,12 +1702,6 @@ LevelThresholdFilterPolicy::LevelThresholdFilterPolicy( assert(starting_level_for_b_ >= 0); } -// Deprecated block-based filter only -void LevelThresholdFilterPolicy::CreateFilter(const Slice* keys, int n, - std::string* dst) const { - policy_b_->CreateFilter(keys, n, dst); -} - FilterBitsBuilder* LevelThresholdFilterPolicy::GetBuilderWithContext( const FilterBuildingContext& context) const { switch (context.compaction_style) { @@ -1758,29 +1758,25 @@ Status FilterPolicy::CreateFromString( const ConfigOptions& /*options*/, const std::string& value, std::shared_ptr* policy) { const std::string kBloomName = "bloomfilter:"; - const std::string kExpRibbonName = "experimental_ribbon:"; const std::string kRibbonName = "ribbonfilter:"; - if (value == kNullptrString || value == "rocksdb.BuiltinBloomFilter") { + if (value == kNullptrString) { policy->reset(); + } else if (value == "rocksdb.BuiltinBloomFilter") { + *policy = std::make_shared(); #ifndef ROCKSDB_LITE } else if (value.compare(0, kBloomName.size(), kBloomName) == 0) { size_t pos = value.find(':', kBloomName.size()); + bool use_block_based_builder; if (pos == std::string::npos) { - return Status::InvalidArgument( - "Invalid filter policy config, missing bits_per_key"); + pos = value.size(); + use_block_based_builder = false; } else { - double bits_per_key = ParseDouble( - trim(value.substr(kBloomName.size(), pos - kBloomName.size()))); - bool use_block_based_builder = + use_block_based_builder = ParseBoolean("use_block_based_builder", trim(value.substr(pos + 1))); - policy->reset( - NewBloomFilterPolicy(bits_per_key, use_block_based_builder)); } - } else if (value.compare(0, kExpRibbonName.size(), kExpRibbonName) == 0) { - double bloom_equivalent_bits_per_key = - ParseDouble(trim(value.substr(kExpRibbonName.size()))); - policy->reset( - NewExperimentalRibbonFilterPolicy(bloom_equivalent_bits_per_key)); + double bits_per_key = ParseDouble( + trim(value.substr(kBloomName.size(), pos - kBloomName.size()))); + policy->reset(NewBloomFilterPolicy(bits_per_key, use_block_based_builder)); } else if (value.compare(0, kRibbonName.size(), kRibbonName) == 0) { size_t pos = value.find(':', kRibbonName.size()); int bloom_before_level; @@ -1790,8 +1786,8 @@ Status FilterPolicy::CreateFromString( } else { bloom_before_level = ParseInt(trim(value.substr(pos + 1))); } - double bloom_equivalent_bits_per_key = - ParseDouble(trim(value.substr(kRibbonName.size(), pos))); + double bloom_equivalent_bits_per_key = ParseDouble( + trim(value.substr(kRibbonName.size(), pos - kRibbonName.size()))); policy->reset(NewRibbonFilterPolicy(bloom_equivalent_bits_per_key, bloom_before_level)); } else { diff --git a/table/block_based/filter_policy_internal.h b/table/block_based/filter_policy_internal.h index bccfc0cf6..efe46ca64 100644 --- a/table/block_based/filter_policy_internal.h +++ b/table/block_based/filter_policy_internal.h @@ -46,7 +46,10 @@ class BuiltinFilterBitsReader : public FilterBitsReader { virtual bool HashMayMatch(const uint64_t /* h */) { return true; } }; -// Abstract base class for RocksDB built-in filter policies. +// Base class for RocksDB built-in filter policies. This can read all +// kinds of built-in filters (for backward compatibility with old +// OPTIONS files) but does not build filters, so new SST files generated +// under the policy will get no filters (like nullptr FilterPolicy). // This class is considered internal API and subject to change. class BuiltinFilterPolicy : public FilterPolicy { public: @@ -57,18 +60,18 @@ class BuiltinFilterPolicy : public FilterPolicy { // any other const char* Name() const override; - // Deprecated block-based filter only - bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override; - - // Old API - FilterBitsBuilder* GetFilterBitsBuilder() const override; - // Read metadata to determine what kind of FilterBitsReader is needed // and return a new one. This must successfully process any filter data // generated by a built-in FilterBitsBuilder, regardless of the impl // chosen for this BloomFilterPolicy. Not compatible with CreateFilter. FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override; + // Does not write filters. + FilterBitsBuilder* GetBuilderWithContext( + const FilterBuildingContext&) const override { + return nullptr; + } + private: // For Bloom filter implementation(s) (except deprecated block-based filter) static BuiltinFilterBitsReader* GetBloomBitsReader(const Slice& contents); @@ -128,8 +131,10 @@ class BloomFilterPolicy : public BuiltinFilterPolicy { ~BloomFilterPolicy() override; - // Deprecated block-based filter only - void CreateFilter(const Slice* keys, int n, std::string* dst) const override; + // For Deprecated block-based filter (no longer customizable in public API) + static void CreateFilter(const Slice* keys, int n, int bits_per_key, + std::string* dst); + static bool KeyMayMatch(const Slice& key, const Slice& bloom_filter); // To use this function, call GetBuilderFromContext(). // @@ -138,6 +143,11 @@ class BloomFilterPolicy : public BuiltinFilterPolicy { FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext&) const override; + // Internal contract: for kDeprecatedBlock, GetBuilderWithContext returns + // a new fake builder that encodes bits per key into a special value from + // EstimateEntriesAdded(), using kSecretBitsPerKeyStart + bits_per_key + static constexpr size_t kSecretBitsPerKeyStart = 1234567890U; + // Returns a new FilterBitsBuilder from the filter_policy in // table_options of a context, or nullptr if not applicable. // (An internal convenience function to save boilerplate.) @@ -195,9 +205,6 @@ class LevelThresholdFilterPolicy : public BuiltinFilterPolicy { std::unique_ptr&& b, int starting_level_for_b); - // Deprecated block-based filter only - void CreateFilter(const Slice* keys, int n, std::string* dst) const override; - FilterBitsBuilder* GetBuilderWithContext( const FilterBuildingContext& context) const override; diff --git a/table/block_based/full_filter_block_test.cc b/table/block_based/full_filter_block_test.cc index f919d1aa5..76f612728 100644 --- a/table/block_based/full_filter_block_test.cc +++ b/table/block_based/full_filter_block_test.cc @@ -44,6 +44,10 @@ class TestFilterBitsBuilder : public FilterBitsBuilder { return Slice(data, len); } + size_t EstimateEntriesAdded() override { return hash_entries_.size(); } + + size_t ApproximateNumEntries(size_t bytes) override { return bytes / 4; } + private: std::vector hash_entries_; }; @@ -81,24 +85,8 @@ class TestHashFilter : public FilterPolicy { public: const char* Name() const override { return "TestHashFilter"; } - void CreateFilter(const Slice* keys, int n, std::string* dst) const override { - for (int i = 0; i < n; i++) { - uint32_t h = Hash(keys[i].data(), keys[i].size(), 1); - PutFixed32(dst, h); - } - } - - bool KeyMayMatch(const Slice& key, const Slice& filter) const override { - uint32_t h = Hash(key.data(), key.size(), 1); - for (unsigned int i = 0; i + 4 <= filter.size(); i += 4) { - if (h == DecodeFixed32(filter.data() + i)) { - return true; - } - } - return false; - } - - FilterBitsBuilder* GetFilterBitsBuilder() const override { + FilterBitsBuilder* GetBuilderWithContext( + const FilterBuildingContext&) const override { return new TestFilterBitsBuilder(); } @@ -233,6 +221,8 @@ class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder { return rv; } + size_t EstimateEntriesAdded() override { return b_->EstimateEntriesAdded(); } + size_t ApproximateNumEntries(size_t bytes) override { return b_->ApproximateNumEntries(bytes); } diff --git a/table/block_based/mock_block_based_table.h b/table/block_based/mock_block_based_table.h index e0533a717..1d6ec9fee 100644 --- a/table/block_based/mock_block_based_table.h +++ b/table/block_based/mock_block_based_table.h @@ -29,7 +29,7 @@ class MockBlockBasedTableTester { InternalKeyComparator icomp_; std::unique_ptr table_; - MockBlockBasedTableTester(const FilterPolicy *filter_policy) + explicit MockBlockBasedTableTester(const FilterPolicy* filter_policy) : ioptions_(options_), env_options_(options_), icomp_(options_.comparator) { diff --git a/util/bloom_impl.h b/util/bloom_impl.h index 5f2a69e07..fadd012d3 100644 --- a/util/bloom_impl.h +++ b/util/bloom_impl.h @@ -41,6 +41,10 @@ class BloomMath { // cache line size. static double CacheLocalFpRate(double bits_per_key, int num_probes, int cache_line_bits) { + if (bits_per_key <= 0.0) { + // Fix a discontinuity + return 1.0; + } double keys_per_cache_line = cache_line_bits / bits_per_key; // A reasonable estimate is the average of the FP rates for one standard // deviation above and below the mean bucket occupancy. See diff --git a/util/bloom_test.cc b/util/bloom_test.cc index 8c3466219..0fa1e3b14 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -63,7 +63,7 @@ static int NextLength(int length) { class BlockBasedBloomTest : public testing::Test { private: - std::unique_ptr policy_; + int bits_per_key_; std::string filter_; std::vector keys_; @@ -76,8 +76,9 @@ class BlockBasedBloomTest : public testing::Test { } void ResetPolicy(double bits_per_key) { - policy_.reset(new BloomFilterPolicy(bits_per_key, - BloomFilterPolicy::kDeprecatedBlock)); + bits_per_key_ = + BloomFilterPolicy(bits_per_key, BloomFilterPolicy::kDeprecatedBlock) + .GetWholeBitsPerKey(); Reset(); } @@ -93,8 +94,9 @@ class BlockBasedBloomTest : public testing::Test { key_slices.push_back(Slice(keys_[i])); } filter_.clear(); - policy_->CreateFilter(&key_slices[0], static_cast(key_slices.size()), - &filter_); + BloomFilterPolicy::CreateFilter(key_slices.data(), + static_cast(key_slices.size()), + bits_per_key_, &filter_); keys_.clear(); if (kVerbose >= 2) DumpFilter(); } @@ -120,7 +122,7 @@ class BlockBasedBloomTest : public testing::Test { if (!keys_.empty()) { Build(); } - return policy_->KeyMayMatch(s, filter_); + return BloomFilterPolicy::KeyMayMatch(s, filter_); } double FalsePositiveRate() { @@ -280,7 +282,7 @@ class FullBloomTest : public testing::TestWithParam { BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() { // Throws on bad cast - return &dynamic_cast(*bits_builder_); + return dynamic_cast(bits_builder_.get()); } const BloomFilterPolicy* GetBloomFilterPolicy() { @@ -397,23 +399,26 @@ TEST_P(FullBloomTest, FilterSize) { // checking that denoted and computed doubles are interpreted as expected // as bits_per_key values. bool some_computed_less_than_denoted = false; - // Note: enforced minimum is 1 bit per key (1000 millibits), and enforced - // maximum is 100 bits per key (100000 millibits). - for (auto bpk : - std::vector >{{-HUGE_VAL, 1000}, - {-INFINITY, 1000}, - {0.0, 1000}, - {1.234, 1234}, - {3.456, 3456}, - {9.5, 9500}, - {10.0, 10000}, - {10.499, 10499}, - {21.345, 21345}, - {99.999, 99999}, - {1234.0, 100000}, - {HUGE_VAL, 100000}, - {INFINITY, 100000}, - {NAN, 100000}}) { + // Note: to avoid unproductive configurations, bits_per_key < 0.5 is rounded + // down to 0 (no filter), and 0.5 <= bits_per_key < 1.0 is rounded up to 1 + // bit per key (1000 millibits). Also, enforced maximum is 100 bits per key + // (100000 millibits). + for (auto bpk : std::vector >{{-HUGE_VAL, 0}, + {-INFINITY, 0}, + {0.0, 0}, + {0.499, 0}, + {0.5, 1000}, + {1.234, 1234}, + {3.456, 3456}, + {9.5, 9500}, + {10.0, 10000}, + {10.499, 10499}, + {21.345, 21345}, + {99.999, 99999}, + {1234.0, 100000}, + {HUGE_VAL, 100000}, + {INFINITY, 100000}, + {NAN, 100000}}) { ResetPolicy(bpk.first); auto bfp = GetBloomFilterPolicy(); EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey()); @@ -433,6 +438,10 @@ TEST_P(FullBloomTest, FilterSize) { EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey()); auto bits_builder = GetBuiltinFilterBitsBuilder(); + if (bpk.second == 0) { + ASSERT_EQ(bits_builder, nullptr); + continue; + } size_t n = 1; size_t space = 0; @@ -1294,13 +1303,8 @@ TEST(RibbonTest, RibbonTestLevelThreshold) { std::vector > policies; policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level)); - if (bloom_before_level == -1) { - // Also test old API - policies.emplace_back(NewExperimentalRibbonFilterPolicy(10)); - } - if (bloom_before_level == 0) { - // Also test old API and new API default + // Also test new API default policies.emplace_back(NewRibbonFilterPolicy(10)); }