Fix the bug with duplicate prefix in partition filters (#4024)

Summary:
https://github.com/facebook/rocksdb/pull/3764 introduced an optimization feature to skip duplicate prefix entires in full bloom filters. Unfortunately it also introduces a bug in partitioned full filters, where the duplicate prefix should still be inserted if it is in a new partition. The patch fixes the bug by resetting the duplicate detection logic each time a partition is cut.
This bug could result into false negatives, which means that DB could skip an existing key.
Closes https://github.com/facebook/rocksdb/pull/4024

Differential Revision: D8518866

Pulled By: maysamyabandeh

fbshipit-source-id: 044f4d988e606a330ecafd8c79daceb68b8796bf
This commit is contained in:
Maysam Yabandeh 2018-06-19 14:06:17 -07:00 committed by Facebook Github Bot
parent 92ee3350e0
commit 28a9d8910b
6 changed files with 72 additions and 21 deletions

View File

@ -10,6 +10,7 @@
### Bug Fixes ### Bug Fixes
* fix deadlock with enable_pipelined_write=true and max_successive_merges > 0 * fix deadlock with enable_pipelined_write=true and max_successive_merges > 0
* Fix corruption in non-iterator reads when mmap is used for file reads * Fix corruption in non-iterator reads when mmap is used for file reads
* Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled.
## 5.14.0 (5/16/2018) ## 5.14.0 (5/16/2018)
### Public API Change ### Public API Change

View File

@ -347,7 +347,7 @@ class BlockBasedTable : public TableReader {
Status VerifyChecksumInBlocks(InternalIterator* index_iter); Status VerifyChecksumInBlocks(InternalIterator* index_iter);
// Create the filter from the filter block. // Create the filter from the filter block.
FilterBlockReader* ReadFilter( virtual FilterBlockReader* ReadFilter(
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_handle, FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_handle,
const bool is_a_filter_partition, const bool is_a_filter_partition,
const SliceTransform* prefix_extractor = nullptr) const; const SliceTransform* prefix_extractor = nullptr) const;

View File

@ -72,8 +72,14 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
} }
} }
void FullFilterBlockBuilder::Reset() {
last_whole_key_recorded_ = false;
last_prefix_recorded_ = false;
}
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
Status* status) { Status* status) {
Reset();
// In this impl we ignore BlockHandle // In this impl we ignore BlockHandle
*status = Status::OK(); *status = Status::OK();
if (num_added_ != 0) { if (num_added_ != 0) {

View File

@ -52,6 +52,7 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
protected: protected:
virtual void AddKey(const Slice& key); virtual void AddKey(const Slice& key);
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_; std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
virtual void Reset();
private: private:
// important: all of these might point to invalid addresses // important: all of these might point to invalid addresses

View File

@ -49,6 +49,7 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock() {
std::string& index_key = p_index_builder_->GetPartitionKey(); std::string& index_key = p_index_builder_->GetPartitionKey();
filters.push_back({index_key, filter}); filters.push_back({index_key, filter});
filters_in_partition_ = 0; filters_in_partition_ = 0;
Reset();
} }
void PartitionedFilterBlockBuilder::AddKey(const Slice& key) { void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {

View File

@ -30,13 +30,24 @@ class MockedBlockBasedTable : public BlockBasedTable {
virtual CachableEntry<FilterBlockReader> GetFilter( virtual CachableEntry<FilterBlockReader> GetFilter(
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle, FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
const bool /* unused */, bool /* unused */, GetContext* /* unused */, const bool /* unused */, bool /* unused */, GetContext* /* unused */,
const SliceTransform* /* unused */) const override { const SliceTransform* prefix_extractor) const override {
Slice slice = slices[filter_blk_handle.offset()]; Slice slice = slices[filter_blk_handle.offset()];
auto obj = new FullFilterBlockReader( auto obj = new FullFilterBlockReader(
nullptr, true, BlockContents(slice, false, kNoCompression), prefix_extractor, true, BlockContents(slice, false, kNoCompression),
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr); rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
return {obj, nullptr}; return {obj, nullptr};
} }
virtual FilterBlockReader* ReadFilter(
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
const bool /* unused */,
const SliceTransform* prefix_extractor) const override {
Slice slice = slices[filter_blk_handle.offset()];
auto obj = new FullFilterBlockReader(
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
return obj;
}
}; };
class PartitionedFilterBlockTest : public testing::Test { class PartitionedFilterBlockTest : public testing::Test {
@ -93,7 +104,8 @@ class PartitionedFilterBlockTest : public testing::Test {
} }
PartitionedFilterBlockBuilder* NewBuilder( PartitionedFilterBlockBuilder* NewBuilder(
PartitionedIndexBuilder* const p_index_builder) { PartitionedIndexBuilder* const p_index_builder,
const SliceTransform* prefix_extractor = nullptr) {
assert(table_options_.block_size_deviation <= 100); assert(table_options_.block_size_deviation <= 100);
auto partition_size = static_cast<uint32_t>( auto partition_size = static_cast<uint32_t>(
((table_options_.metadata_block_size * ((table_options_.metadata_block_size *
@ -102,7 +114,7 @@ class PartitionedFilterBlockTest : public testing::Test {
100); 100);
partition_size = std::max(partition_size, static_cast<uint32_t>(1)); partition_size = std::max(partition_size, static_cast<uint32_t>(1));
return new PartitionedFilterBlockBuilder( return new PartitionedFilterBlockBuilder(
nullptr, table_options_.whole_key_filtering, prefix_extractor, table_options_.whole_key_filtering,
table_options_.filter_policy->GetFilterBitsBuilder(), table_options_.filter_policy->GetFilterBitsBuilder(),
table_options_.index_block_restart_interval, p_index_builder, table_options_.index_block_restart_interval, p_index_builder,
partition_size); partition_size);
@ -111,7 +123,8 @@ class PartitionedFilterBlockTest : public testing::Test {
std::unique_ptr<MockedBlockBasedTable> table; std::unique_ptr<MockedBlockBasedTable> table;
PartitionedFilterBlockReader* NewReader( PartitionedFilterBlockReader* NewReader(
PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) { PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib,
const SliceTransform* prefix_extractor) {
BlockHandle bh; BlockHandle bh;
Status status; Status status;
Slice slice; Slice slice;
@ -126,41 +139,42 @@ class PartitionedFilterBlockTest : public testing::Test {
table.reset(new MockedBlockBasedTable(new BlockBasedTable::Rep( table.reset(new MockedBlockBasedTable(new BlockBasedTable::Rep(
ioptions, env_options, table_options_, icomp, false))); ioptions, env_options, table_options_, icomp, false)));
auto reader = new PartitionedFilterBlockReader( auto reader = new PartitionedFilterBlockReader(
nullptr, true, BlockContents(slice, false, kNoCompression), nullptr, prefix_extractor, true, BlockContents(slice, false, kNoCompression),
nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq()); nullptr, nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
return reader; return reader;
} }
void VerifyReader(PartitionedFilterBlockBuilder* builder, void VerifyReader(PartitionedFilterBlockBuilder* builder,
PartitionedIndexBuilder* pib, bool empty = false) { PartitionedIndexBuilder* pib, bool empty = false,
const SliceTransform* prefix_extractor = nullptr) {
std::unique_ptr<PartitionedFilterBlockReader> reader( std::unique_ptr<PartitionedFilterBlockReader> reader(
NewReader(builder, pib)); NewReader(builder, pib, prefix_extractor));
// Querying added keys // Querying added keys
const bool no_io = true; const bool no_io = true;
for (auto key : keys) { for (auto key : keys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue); auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep()); const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE( ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io,
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice)); &ikey_slice));
} }
{ {
// querying a key twice // querying a key twice
auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue); auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep()); const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->KeyMayMatch(keys[0], nullptr, kNotValid, !no_io, ASSERT_TRUE(reader->KeyMayMatch(keys[0], prefix_extractor, kNotValid,
&ikey_slice)); !no_io, &ikey_slice));
} }
// querying missing keys // querying missing keys
for (auto key : missing_keys) { for (auto key : missing_keys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue); auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep()); const Slice ikey_slice = Slice(*ikey.rep());
if (empty) { if (empty) {
ASSERT_TRUE( ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice)); !no_io, &ikey_slice));
} else { } else {
// assuming a good hash function // assuming a good hash function
ASSERT_FALSE( ASSERT_FALSE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice)); !no_io, &ikey_slice));
} }
} }
} }
@ -187,10 +201,10 @@ class PartitionedFilterBlockTest : public testing::Test {
return CountNumOfIndexPartitions(pib.get()); return CountNumOfIndexPartitions(pib.get());
} }
void TestBlockPerTwoKeys() { void TestBlockPerTwoKeys(const SliceTransform* prefix_extractor = nullptr) {
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder()); std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
std::unique_ptr<PartitionedFilterBlockBuilder> builder( std::unique_ptr<PartitionedFilterBlockBuilder> builder(
NewBuilder(pib.get())); NewBuilder(pib.get(), prefix_extractor));
int i = 0; int i = 0;
builder->Add(keys[i]); builder->Add(keys[i]);
i++; i++;
@ -203,7 +217,7 @@ class PartitionedFilterBlockTest : public testing::Test {
builder->Add(keys[i]); builder->Add(keys[i]);
CutABlock(pib.get(), keys[i]); CutABlock(pib.get(), keys[i]);
VerifyReader(builder.get(), pib.get()); VerifyReader(builder.get(), pib.get(), prefix_extractor);
} }
void TestBlockPerAllKeys() { void TestBlockPerAllKeys() {
@ -281,6 +295,34 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
} }
} }
// This reproduces the bug that a prefix is the same among multiple consecutive
// blocks but the bug would add it only to the first block.
TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
// some small number to cause partition cuts
table_options_.metadata_block_size = 1;
std::unique_ptr<const SliceTransform> prefix_extractor
(rocksdb::NewFixedPrefixTransform(1));
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
NewBuilder(pib.get(), prefix_extractor.get()));
const std::string pkeys[3] = {"p-key1", "p-key2", "p-key3"};
builder->Add(pkeys[0]);
CutABlock(pib.get(), pkeys[0], pkeys[1]);
builder->Add(pkeys[1]);
CutABlock(pib.get(), pkeys[1], pkeys[2]);
builder->Add(pkeys[2]);
CutABlock(pib.get(), pkeys[2]);
std::unique_ptr<PartitionedFilterBlockReader> reader(
NewReader(builder.get(), pib.get(), prefix_extractor.get()));
for (auto key : pkeys) {
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
const Slice ikey_slice = Slice(*ikey.rep());
ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key),
prefix_extractor.get(), kNotValid,
false /*no_io*/, &ikey_slice));
}
}
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) { TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
uint64_t max_index_size = MaxIndexSize(); uint64_t max_index_size = MaxIndexSize();
for (uint64_t i = 1; i < max_index_size + 1; i++) { for (uint64_t i = 1; i < max_index_size + 1; i++) {