Tests for filter compatibility (#9773)

Summary:
This change adds two unit tests that would each catch the
regression fixed in https://github.com/facebook/rocksdb/issues/9736

* TableMetaIndexKeys - detects any churn in metaindex block keys
generated by SST files using standard db_test_util configurations.
* BloomFilterCompatibility - this detects if any common built-in
FilterPolicy configurations fail to read filters generated by another.
(The regression bug caused NewRibbonFilterPolicy not to read filters
from NewBloomFilterPolicy and vice-versa.) This replaces some previous
tests that didn't really appear to be testing much of anything except
basic data correctness, which doesn't tell you a filter is being used.

Light refactoring in meta_blocks.cc/h to support inspecting metaindex
keys.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9773

Test Plan:
this is the test. Verified that 7.0.2 fails both tests and 7.0.3 passes.
With backporting for intentional API changes in 7.0, 6.29 also passes.

Reviewed By: ajkr

Differential Revision: D35236248

Pulled By: pdillinger

fbshipit-source-id: 493dfe9ad7e27524bf7c6c1af8a4b8c31bc6ef5a
This commit is contained in:
Peter Dillinger 2022-04-06 15:54:40 -07:00 committed by Facebook GitHub Bot
parent c3d7e16252
commit 8ce7cea93f
4 changed files with 197 additions and 80 deletions

View File

@ -17,8 +17,11 @@
#include "db/db_test_util.h"
#include "options/options_helper.h"
#include "port/stack_trace.h"
#include "rocksdb/advanced_options.h"
#include "rocksdb/convenience.h"
#include "rocksdb/filter_policy.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/statistics.h"
#include "rocksdb/table.h"
#include "table/block_based/block_based_table_reader.h"
#include "table/block_based/filter_policy_internal.h"
@ -798,83 +801,87 @@ TEST_F(DBBloomFilterTest, BloomFilterRate) {
}
}
namespace {
struct CompatibilityConfig {
std::shared_ptr<const FilterPolicy> policy;
bool partitioned;
uint32_t format_version;
void SetInTableOptions(BlockBasedTableOptions* table_options) {
table_options->filter_policy = policy;
table_options->partition_filters = partitioned;
if (partitioned) {
table_options->index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
} else {
table_options->index_type =
BlockBasedTableOptions::IndexType::kBinarySearch;
}
table_options->format_version = format_version;
}
};
// High bits per key -> almost no FPs
std::shared_ptr<const FilterPolicy> kCompatibilityBloomPolicy{
NewBloomFilterPolicy(20)};
// bloom_before_level=-1 -> always use Ribbon
std::shared_ptr<const FilterPolicy> kCompatibilityRibbonPolicy{
NewRibbonFilterPolicy(20, -1)};
std::vector<CompatibilityConfig> kCompatibilityConfigs = {
{Create(20, kDeprecatedBlock), false,
BlockBasedTableOptions().format_version},
{kCompatibilityBloomPolicy, false, BlockBasedTableOptions().format_version},
{kCompatibilityBloomPolicy, true, BlockBasedTableOptions().format_version},
{kCompatibilityBloomPolicy, false, /* legacy Bloom */ 4U},
{kCompatibilityRibbonPolicy, false,
BlockBasedTableOptions().format_version},
{kCompatibilityRibbonPolicy, true, BlockBasedTableOptions().format_version},
};
} // namespace
TEST_F(DBBloomFilterTest, BloomFilterCompatibility) {
Options options = CurrentOptions();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
options.level0_file_num_compaction_trigger =
static_cast<int>(kCompatibilityConfigs.size()) + 1;
options.max_open_files = -1;
Close();
// Create one file for each kind of filter. Each file covers a distinct key
// range.
for (size_t i = 0; i < kCompatibilityConfigs.size(); ++i) {
BlockBasedTableOptions table_options;
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
kCompatibilityConfigs[i].SetInTableOptions(&table_options);
ASSERT_TRUE(table_options.filter_policy != nullptr);
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
Reopen(options);
// Create with block based filter
CreateAndReopenWithCF({"pikachu"}, options);
const int maxKey = 10000;
for (int i = 0; i < maxKey; i++) {
ASSERT_OK(Put(1, Key(i), Key(i)));
std::string prefix = ToString(i) + "_";
ASSERT_OK(Put(prefix + "A", "val"));
ASSERT_OK(Put(prefix + "Z", "val"));
ASSERT_OK(Flush());
}
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
Flush(1);
// Check db with full filter
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ReopenWithColumnFamilies({"default", "pikachu"}, options);
// Check if they can be found
for (int i = 0; i < maxKey; i++) {
ASSERT_EQ(Key(i), Get(1, Key(i)));
}
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
// Check db with partitioned full filter
table_options.partition_filters = true;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ReopenWithColumnFamilies({"default", "pikachu"}, options);
// Check if they can be found
for (int i = 0; i < maxKey; i++) {
ASSERT_EQ(Key(i), Get(1, Key(i)));
}
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
}
TEST_F(DBBloomFilterTest, BloomFilterReverseCompatibility) {
for (bool partition_filters : {true, false}) {
Options options = CurrentOptions();
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
// Test filter is used between each pair of {reader,writer} configurations,
// because any built-in FilterPolicy should be able to read filters from any
// other built-in FilterPolicy
for (size_t i = 0; i < kCompatibilityConfigs.size(); ++i) {
BlockBasedTableOptions table_options;
if (partition_filters) {
table_options.partition_filters = true;
table_options.index_type =
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
}
table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
kCompatibilityConfigs[i].SetInTableOptions(&table_options);
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);
// Create with full filter
CreateAndReopenWithCF({"pikachu"}, options);
const int maxKey = 10000;
for (int i = 0; i < maxKey; i++) {
ASSERT_OK(Put(1, Key(i), Key(i)));
Reopen(options);
for (size_t j = 0; j < kCompatibilityConfigs.size(); ++j) {
std::string prefix = ToString(j) + "_";
ASSERT_EQ("val", Get(prefix + "A")); // Filter positive
ASSERT_EQ("val", Get(prefix + "Z")); // Filter positive
// Filter negative, with high probability
ASSERT_EQ("NOT_FOUND", Get(prefix + "Q"));
// FULL_POSITIVE does not include block-based filter case (j == 0)
EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_FULL_POSITIVE),
j == 0 ? 0 : 2);
EXPECT_EQ(TestGetAndResetTickerCount(options, BLOOM_FILTER_USEFUL), 1);
}
ASSERT_OK(Put(1, Key(maxKey + 55555), Key(maxKey + 55555)));
Flush(1);
// Check db with block_based filter
table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
ReopenWithColumnFamilies({"default", "pikachu"}, options);
// Check if they can be found
for (int i = 0; i < maxKey; i++) {
ASSERT_EQ(Key(i), Get(1, Key(i)));
}
ASSERT_EQ(TestGetTickerCount(options, BLOOM_FILTER_USEFUL), 0);
}
}

View File

@ -13,12 +13,17 @@
#include <string>
#include "db/db_test_util.h"
#include "options/cf_options.h"
#include "port/stack_trace.h"
#include "rocksdb/listener.h"
#include "rocksdb/options.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/perf_level.h"
#include "rocksdb/table.h"
#include "table/block_based/block.h"
#include "table/format.h"
#include "table/meta_blocks.h"
#include "table/table_builder.h"
#include "test_util/mock_time_env.h"
#include "util/random.h"
#include "util/string_util.h"
@ -1992,6 +1997,90 @@ TEST_F(DBPropertiesTest, GetMapPropertyDbStats) {
Close();
}
namespace {
std::string PopMetaIndexKey(InternalIterator* meta_iter) {
Status s = meta_iter->status();
if (!s.ok()) {
return s.ToString();
} else if (meta_iter->Valid()) {
std::string rv = meta_iter->key().ToString();
meta_iter->Next();
return rv;
} else {
return "NOT_FOUND";
}
}
} // namespace
TEST_F(DBPropertiesTest, TableMetaIndexKeys) {
// This is to detect unexpected churn in metaindex block keys. This is more
// of a "table test" but table_test.cc doesn't depend on db_test_util.h and
// we need ChangeOptions() for broad coverage.
constexpr int kKeyCount = 100;
do {
Options options;
options = CurrentOptions(options);
DestroyAndReopen(options);
// Create an SST file
for (int key = 0; key < kKeyCount; key++) {
ASSERT_OK(Put(Key(key), "val"));
}
ASSERT_OK(Flush());
// Find its file number
std::vector<LiveFileMetaData> files;
db_->GetLiveFilesMetaData(&files);
// 1 SST file
ASSERT_EQ(1, files.size());
// Open it for inspection
std::string sst_file =
files[0].directory + "/" + files[0].relative_filename;
std::unique_ptr<FSRandomAccessFile> f;
ASSERT_OK(env_->GetFileSystem()->NewRandomAccessFile(
sst_file, FileOptions(), &f, nullptr));
std::unique_ptr<RandomAccessFileReader> r;
r.reset(new RandomAccessFileReader(std::move(f), sst_file));
uint64_t file_size = 0;
ASSERT_OK(env_->GetFileSize(sst_file, &file_size));
// Read metaindex
BlockContents bc;
ASSERT_OK(ReadMetaIndexBlockInFile(r.get(), file_size, 0U,
ImmutableOptions(options), &bc));
Block metaindex_block(std::move(bc));
std::unique_ptr<InternalIterator> meta_iter;
meta_iter.reset(metaindex_block.NewMetaIterator());
meta_iter->SeekToFirst();
if (strcmp(options.table_factory->Name(),
TableFactory::kBlockBasedTableName()) == 0) {
auto bbto = options.table_factory->GetOptions<BlockBasedTableOptions>();
if (bbto->filter_policy) {
if (bbto->partition_filters) {
// The key names are intentionally hard-coded here to detect
// accidental regression on compatibility.
EXPECT_EQ("partitionedfilter.rocksdb.BuiltinBloomFilter",
PopMetaIndexKey(meta_iter.get()));
} else {
EXPECT_EQ("fullfilter.rocksdb.BuiltinBloomFilter",
PopMetaIndexKey(meta_iter.get()));
}
}
if (bbto->index_type == BlockBasedTableOptions::kHashSearch) {
EXPECT_EQ("rocksdb.hashindex.metadata",
PopMetaIndexKey(meta_iter.get()));
EXPECT_EQ("rocksdb.hashindex.prefixes",
PopMetaIndexKey(meta_iter.get()));
}
}
EXPECT_EQ("rocksdb.properties", PopMetaIndexKey(meta_iter.get()));
EXPECT_EQ("NOT_FOUND", PopMetaIndexKey(meta_iter.get()));
} while (ChangeOptions());
}
#endif // ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE

View File

@ -463,11 +463,10 @@ Status FindMetaBlock(InternalIterator* meta_index_iter,
}
}
Status FindMetaBlockInFile(RandomAccessFileReader* file, uint64_t file_size,
uint64_t table_magic_number,
Status ReadMetaIndexBlockInFile(RandomAccessFileReader* file,
uint64_t file_size, uint64_t table_magic_number,
const ImmutableOptions& ioptions,
const std::string& meta_block_name,
BlockHandle* block_handle,
BlockContents* metaindex_contents,
MemoryAllocator* memory_allocator,
FilePrefetchBuffer* prefetch_buffer,
Footer* footer_out) {
@ -483,13 +482,26 @@ Status FindMetaBlockInFile(RandomAccessFileReader* file, uint64_t file_size,
}
auto metaindex_handle = footer.metaindex_handle();
BlockContents metaindex_contents;
s = BlockFetcher(file, prefetch_buffer, footer, ReadOptions(),
metaindex_handle, &metaindex_contents, ioptions,
return BlockFetcher(file, prefetch_buffer, footer, ReadOptions(),
metaindex_handle, metaindex_contents, ioptions,
false /* do decompression */, false /*maybe_compressed*/,
BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(),
PersistentCacheOptions::kEmpty, memory_allocator)
.ReadBlockContents();
}
Status FindMetaBlockInFile(RandomAccessFileReader* file, uint64_t file_size,
uint64_t table_magic_number,
const ImmutableOptions& ioptions,
const std::string& meta_block_name,
BlockHandle* block_handle,
MemoryAllocator* memory_allocator,
FilePrefetchBuffer* prefetch_buffer,
Footer* footer_out) {
BlockContents metaindex_contents;
auto s = ReadMetaIndexBlockInFile(
file, file_size, table_magic_number, ioptions, &metaindex_contents,
memory_allocator, prefetch_buffer, footer_out);
if (!s.ok()) {
return s;
}

View File

@ -145,6 +145,15 @@ Status FindMetaBlockInFile(RandomAccessFileReader* file, uint64_t file_size,
FilePrefetchBuffer* prefetch_buffer = nullptr,
Footer* footer_out = nullptr);
// Read meta block contents
Status ReadMetaIndexBlockInFile(RandomAccessFileReader* file,
uint64_t file_size, uint64_t table_magic_number,
const ImmutableOptions& ioptions,
BlockContents* block_contents,
MemoryAllocator* memory_allocator = nullptr,
FilePrefetchBuffer* prefetch_buffer = nullptr,
Footer* footer_out = nullptr);
// Read the specified meta block with name meta_block_name
// from `file` and initialize `contents` with contents of this block.
// Return Status::OK in case of success.