Fix the bug with duplicate prefix in partition filters (#4024)
Summary: https://github.com/facebook/rocksdb/pull/3764 introduced an optimization feature to skip duplicate prefix entires in full bloom filters. Unfortunately it also introduces a bug in partitioned full filters, where the duplicate prefix should still be inserted if it is in a new partition. The patch fixes the bug by resetting the duplicate detection logic each time a partition is cut. This bug could result into false negatives, which means that DB could skip an existing key. Closes https://github.com/facebook/rocksdb/pull/4024 Differential Revision: D8518866 Pulled By: maysamyabandeh fbshipit-source-id: 044f4d988e606a330ecafd8c79daceb68b8796bf
This commit is contained in:
parent
92ee3350e0
commit
28a9d8910b
@ -10,6 +10,7 @@
|
||||
### Bug Fixes
|
||||
* fix deadlock with enable_pipelined_write=true and max_successive_merges > 0
|
||||
* Fix corruption in non-iterator reads when mmap is used for file reads
|
||||
* Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled.
|
||||
|
||||
## 5.14.0 (5/16/2018)
|
||||
### Public API Change
|
||||
|
@ -347,7 +347,7 @@ class BlockBasedTable : public TableReader {
|
||||
Status VerifyChecksumInBlocks(InternalIterator* index_iter);
|
||||
|
||||
// Create the filter from the filter block.
|
||||
FilterBlockReader* ReadFilter(
|
||||
virtual FilterBlockReader* ReadFilter(
|
||||
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_handle,
|
||||
const bool is_a_filter_partition,
|
||||
const SliceTransform* prefix_extractor = nullptr) const;
|
||||
|
@ -72,8 +72,14 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
|
||||
}
|
||||
}
|
||||
|
||||
void FullFilterBlockBuilder::Reset() {
|
||||
last_whole_key_recorded_ = false;
|
||||
last_prefix_recorded_ = false;
|
||||
}
|
||||
|
||||
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
|
||||
Status* status) {
|
||||
Reset();
|
||||
// In this impl we ignore BlockHandle
|
||||
*status = Status::OK();
|
||||
if (num_added_ != 0) {
|
||||
|
@ -52,6 +52,7 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
|
||||
protected:
|
||||
virtual void AddKey(const Slice& key);
|
||||
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
|
||||
virtual void Reset();
|
||||
|
||||
private:
|
||||
// important: all of these might point to invalid addresses
|
||||
|
@ -49,6 +49,7 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock() {
|
||||
std::string& index_key = p_index_builder_->GetPartitionKey();
|
||||
filters.push_back({index_key, filter});
|
||||
filters_in_partition_ = 0;
|
||||
Reset();
|
||||
}
|
||||
|
||||
void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {
|
||||
|
@ -30,13 +30,24 @@ class MockedBlockBasedTable : public BlockBasedTable {
|
||||
virtual CachableEntry<FilterBlockReader> GetFilter(
|
||||
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
|
||||
const bool /* unused */, bool /* unused */, GetContext* /* unused */,
|
||||
const SliceTransform* /* unused */) const override {
|
||||
const SliceTransform* prefix_extractor) const override {
|
||||
Slice slice = slices[filter_blk_handle.offset()];
|
||||
auto obj = new FullFilterBlockReader(
|
||||
nullptr, true, BlockContents(slice, false, kNoCompression),
|
||||
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
|
||||
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
|
||||
return {obj, nullptr};
|
||||
}
|
||||
|
||||
virtual FilterBlockReader* ReadFilter(
|
||||
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
|
||||
const bool /* unused */,
|
||||
const SliceTransform* prefix_extractor) const override {
|
||||
Slice slice = slices[filter_blk_handle.offset()];
|
||||
auto obj = new FullFilterBlockReader(
|
||||
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
|
||||
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
|
||||
return obj;
|
||||
}
|
||||
};
|
||||
|
||||
class PartitionedFilterBlockTest : public testing::Test {
|
||||
@ -93,7 +104,8 @@ class PartitionedFilterBlockTest : public testing::Test {
|
||||
}
|
||||
|
||||
PartitionedFilterBlockBuilder* NewBuilder(
|
||||
PartitionedIndexBuilder* const p_index_builder) {
|
||||
PartitionedIndexBuilder* const p_index_builder,
|
||||
const SliceTransform* prefix_extractor = nullptr) {
|
||||
assert(table_options_.block_size_deviation <= 100);
|
||||
auto partition_size = static_cast<uint32_t>(
|
||||
((table_options_.metadata_block_size *
|
||||
@ -102,7 +114,7 @@ class PartitionedFilterBlockTest : public testing::Test {
|
||||
100);
|
||||
partition_size = std::max(partition_size, static_cast<uint32_t>(1));
|
||||
return new PartitionedFilterBlockBuilder(
|
||||
nullptr, table_options_.whole_key_filtering,
|
||||
prefix_extractor, table_options_.whole_key_filtering,
|
||||
table_options_.filter_policy->GetFilterBitsBuilder(),
|
||||
table_options_.index_block_restart_interval, p_index_builder,
|
||||
partition_size);
|
||||
@ -111,7 +123,8 @@ class PartitionedFilterBlockTest : public testing::Test {
|
||||
std::unique_ptr<MockedBlockBasedTable> table;
|
||||
|
||||
PartitionedFilterBlockReader* NewReader(
|
||||
PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) {
|
||||
PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib,
|
||||
const SliceTransform* prefix_extractor) {
|
||||
BlockHandle bh;
|
||||
Status status;
|
||||
Slice slice;
|
||||
@ -126,41 +139,42 @@ class PartitionedFilterBlockTest : public testing::Test {
|
||||
table.reset(new MockedBlockBasedTable(new BlockBasedTable::Rep(
|
||||
ioptions, env_options, table_options_, icomp, false)));
|
||||
auto reader = new PartitionedFilterBlockReader(
|
||||
nullptr, true, BlockContents(slice, false, kNoCompression), nullptr,
|
||||
nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
|
||||
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
|
||||
nullptr, nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
|
||||
return reader;
|
||||
}
|
||||
|
||||
void VerifyReader(PartitionedFilterBlockBuilder* builder,
|
||||
PartitionedIndexBuilder* pib, bool empty = false) {
|
||||
PartitionedIndexBuilder* pib, bool empty = false,
|
||||
const SliceTransform* prefix_extractor = nullptr) {
|
||||
std::unique_ptr<PartitionedFilterBlockReader> reader(
|
||||
NewReader(builder, pib));
|
||||
NewReader(builder, pib, prefix_extractor));
|
||||
// Querying added keys
|
||||
const bool no_io = true;
|
||||
for (auto key : keys) {
|
||||
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
||||
const Slice ikey_slice = Slice(*ikey.rep());
|
||||
ASSERT_TRUE(
|
||||
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
|
||||
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io,
|
||||
&ikey_slice));
|
||||
}
|
||||
{
|
||||
// querying a key twice
|
||||
auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
|
||||
const Slice ikey_slice = Slice(*ikey.rep());
|
||||
ASSERT_TRUE(reader->KeyMayMatch(keys[0], nullptr, kNotValid, !no_io,
|
||||
&ikey_slice));
|
||||
ASSERT_TRUE(reader->KeyMayMatch(keys[0], prefix_extractor, kNotValid,
|
||||
!no_io, &ikey_slice));
|
||||
}
|
||||
// querying missing keys
|
||||
for (auto key : missing_keys) {
|
||||
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
||||
const Slice ikey_slice = Slice(*ikey.rep());
|
||||
if (empty) {
|
||||
ASSERT_TRUE(
|
||||
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
|
||||
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
|
||||
!no_io, &ikey_slice));
|
||||
} else {
|
||||
// assuming a good hash function
|
||||
ASSERT_FALSE(
|
||||
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
|
||||
ASSERT_FALSE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
|
||||
!no_io, &ikey_slice));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -187,10 +201,10 @@ class PartitionedFilterBlockTest : public testing::Test {
|
||||
return CountNumOfIndexPartitions(pib.get());
|
||||
}
|
||||
|
||||
void TestBlockPerTwoKeys() {
|
||||
void TestBlockPerTwoKeys(const SliceTransform* prefix_extractor = nullptr) {
|
||||
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
|
||||
NewBuilder(pib.get()));
|
||||
NewBuilder(pib.get(), prefix_extractor));
|
||||
int i = 0;
|
||||
builder->Add(keys[i]);
|
||||
i++;
|
||||
@ -203,7 +217,7 @@ class PartitionedFilterBlockTest : public testing::Test {
|
||||
builder->Add(keys[i]);
|
||||
CutABlock(pib.get(), keys[i]);
|
||||
|
||||
VerifyReader(builder.get(), pib.get());
|
||||
VerifyReader(builder.get(), pib.get(), prefix_extractor);
|
||||
}
|
||||
|
||||
void TestBlockPerAllKeys() {
|
||||
@ -281,6 +295,34 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
|
||||
}
|
||||
}
|
||||
|
||||
// This reproduces the bug that a prefix is the same among multiple consecutive
|
||||
// blocks but the bug would add it only to the first block.
|
||||
TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
|
||||
// some small number to cause partition cuts
|
||||
table_options_.metadata_block_size = 1;
|
||||
std::unique_ptr<const SliceTransform> prefix_extractor
|
||||
(rocksdb::NewFixedPrefixTransform(1));
|
||||
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
|
||||
NewBuilder(pib.get(), prefix_extractor.get()));
|
||||
const std::string pkeys[3] = {"p-key1", "p-key2", "p-key3"};
|
||||
builder->Add(pkeys[0]);
|
||||
CutABlock(pib.get(), pkeys[0], pkeys[1]);
|
||||
builder->Add(pkeys[1]);
|
||||
CutABlock(pib.get(), pkeys[1], pkeys[2]);
|
||||
builder->Add(pkeys[2]);
|
||||
CutABlock(pib.get(), pkeys[2]);
|
||||
std::unique_ptr<PartitionedFilterBlockReader> reader(
|
||||
NewReader(builder.get(), pib.get(), prefix_extractor.get()));
|
||||
for (auto key : pkeys) {
|
||||
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
||||
const Slice ikey_slice = Slice(*ikey.rep());
|
||||
ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key),
|
||||
prefix_extractor.get(), kNotValid,
|
||||
false /*no_io*/, &ikey_slice));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
|
||||
uint64_t max_index_size = MaxIndexSize();
|
||||
for (uint64_t i = 1; i < max_index_size + 1; i++) {
|
||||
|
Loading…
Reference in New Issue
Block a user