Fix the bug with duplicate prefix in partition filters (#4024)
Summary: https://github.com/facebook/rocksdb/pull/3764 introduced an optimization feature to skip duplicate prefix entires in full bloom filters. Unfortunately it also introduces a bug in partitioned full filters, where the duplicate prefix should still be inserted if it is in a new partition. The patch fixes the bug by resetting the duplicate detection logic each time a partition is cut. This bug could result into false negatives, which means that DB could skip an existing key. Closes https://github.com/facebook/rocksdb/pull/4024 Differential Revision: D8518866 Pulled By: maysamyabandeh fbshipit-source-id: 044f4d988e606a330ecafd8c79daceb68b8796bf
This commit is contained in:
parent
92ee3350e0
commit
28a9d8910b
@ -10,6 +10,7 @@
|
|||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
* fix deadlock with enable_pipelined_write=true and max_successive_merges > 0
|
* fix deadlock with enable_pipelined_write=true and max_successive_merges > 0
|
||||||
* Fix corruption in non-iterator reads when mmap is used for file reads
|
* Fix corruption in non-iterator reads when mmap is used for file reads
|
||||||
|
* Fix bug with prefix search in partition filters where a shared prefix would be ignored from the later partitions. The bug could report an eixstent key as missing. The bug could be triggered if prefix_extractor is set and partition filters is enabled.
|
||||||
|
|
||||||
## 5.14.0 (5/16/2018)
|
## 5.14.0 (5/16/2018)
|
||||||
### Public API Change
|
### Public API Change
|
||||||
|
@ -347,7 +347,7 @@ class BlockBasedTable : public TableReader {
|
|||||||
Status VerifyChecksumInBlocks(InternalIterator* index_iter);
|
Status VerifyChecksumInBlocks(InternalIterator* index_iter);
|
||||||
|
|
||||||
// Create the filter from the filter block.
|
// Create the filter from the filter block.
|
||||||
FilterBlockReader* ReadFilter(
|
virtual FilterBlockReader* ReadFilter(
|
||||||
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_handle,
|
FilePrefetchBuffer* prefetch_buffer, const BlockHandle& filter_handle,
|
||||||
const bool is_a_filter_partition,
|
const bool is_a_filter_partition,
|
||||||
const SliceTransform* prefix_extractor = nullptr) const;
|
const SliceTransform* prefix_extractor = nullptr) const;
|
||||||
|
@ -72,8 +72,14 @@ inline void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FullFilterBlockBuilder::Reset() {
|
||||||
|
last_whole_key_recorded_ = false;
|
||||||
|
last_prefix_recorded_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
|
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
|
||||||
Status* status) {
|
Status* status) {
|
||||||
|
Reset();
|
||||||
// In this impl we ignore BlockHandle
|
// In this impl we ignore BlockHandle
|
||||||
*status = Status::OK();
|
*status = Status::OK();
|
||||||
if (num_added_ != 0) {
|
if (num_added_ != 0) {
|
||||||
|
@ -52,6 +52,7 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
|
|||||||
protected:
|
protected:
|
||||||
virtual void AddKey(const Slice& key);
|
virtual void AddKey(const Slice& key);
|
||||||
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
|
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
|
||||||
|
virtual void Reset();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// important: all of these might point to invalid addresses
|
// important: all of these might point to invalid addresses
|
||||||
|
@ -49,6 +49,7 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock() {
|
|||||||
std::string& index_key = p_index_builder_->GetPartitionKey();
|
std::string& index_key = p_index_builder_->GetPartitionKey();
|
||||||
filters.push_back({index_key, filter});
|
filters.push_back({index_key, filter});
|
||||||
filters_in_partition_ = 0;
|
filters_in_partition_ = 0;
|
||||||
|
Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {
|
void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {
|
||||||
|
@ -30,13 +30,24 @@ class MockedBlockBasedTable : public BlockBasedTable {
|
|||||||
virtual CachableEntry<FilterBlockReader> GetFilter(
|
virtual CachableEntry<FilterBlockReader> GetFilter(
|
||||||
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
|
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
|
||||||
const bool /* unused */, bool /* unused */, GetContext* /* unused */,
|
const bool /* unused */, bool /* unused */, GetContext* /* unused */,
|
||||||
const SliceTransform* /* unused */) const override {
|
const SliceTransform* prefix_extractor) const override {
|
||||||
Slice slice = slices[filter_blk_handle.offset()];
|
Slice slice = slices[filter_blk_handle.offset()];
|
||||||
auto obj = new FullFilterBlockReader(
|
auto obj = new FullFilterBlockReader(
|
||||||
nullptr, true, BlockContents(slice, false, kNoCompression),
|
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
|
||||||
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
|
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
|
||||||
return {obj, nullptr};
|
return {obj, nullptr};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual FilterBlockReader* ReadFilter(
|
||||||
|
FilePrefetchBuffer*, const BlockHandle& filter_blk_handle,
|
||||||
|
const bool /* unused */,
|
||||||
|
const SliceTransform* prefix_extractor) const override {
|
||||||
|
Slice slice = slices[filter_blk_handle.offset()];
|
||||||
|
auto obj = new FullFilterBlockReader(
|
||||||
|
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
|
||||||
|
rep_->table_options.filter_policy->GetFilterBitsReader(slice), nullptr);
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class PartitionedFilterBlockTest : public testing::Test {
|
class PartitionedFilterBlockTest : public testing::Test {
|
||||||
@ -93,7 +104,8 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||||||
}
|
}
|
||||||
|
|
||||||
PartitionedFilterBlockBuilder* NewBuilder(
|
PartitionedFilterBlockBuilder* NewBuilder(
|
||||||
PartitionedIndexBuilder* const p_index_builder) {
|
PartitionedIndexBuilder* const p_index_builder,
|
||||||
|
const SliceTransform* prefix_extractor = nullptr) {
|
||||||
assert(table_options_.block_size_deviation <= 100);
|
assert(table_options_.block_size_deviation <= 100);
|
||||||
auto partition_size = static_cast<uint32_t>(
|
auto partition_size = static_cast<uint32_t>(
|
||||||
((table_options_.metadata_block_size *
|
((table_options_.metadata_block_size *
|
||||||
@ -102,7 +114,7 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||||||
100);
|
100);
|
||||||
partition_size = std::max(partition_size, static_cast<uint32_t>(1));
|
partition_size = std::max(partition_size, static_cast<uint32_t>(1));
|
||||||
return new PartitionedFilterBlockBuilder(
|
return new PartitionedFilterBlockBuilder(
|
||||||
nullptr, table_options_.whole_key_filtering,
|
prefix_extractor, table_options_.whole_key_filtering,
|
||||||
table_options_.filter_policy->GetFilterBitsBuilder(),
|
table_options_.filter_policy->GetFilterBitsBuilder(),
|
||||||
table_options_.index_block_restart_interval, p_index_builder,
|
table_options_.index_block_restart_interval, p_index_builder,
|
||||||
partition_size);
|
partition_size);
|
||||||
@ -111,7 +123,8 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||||||
std::unique_ptr<MockedBlockBasedTable> table;
|
std::unique_ptr<MockedBlockBasedTable> table;
|
||||||
|
|
||||||
PartitionedFilterBlockReader* NewReader(
|
PartitionedFilterBlockReader* NewReader(
|
||||||
PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) {
|
PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib,
|
||||||
|
const SliceTransform* prefix_extractor) {
|
||||||
BlockHandle bh;
|
BlockHandle bh;
|
||||||
Status status;
|
Status status;
|
||||||
Slice slice;
|
Slice slice;
|
||||||
@ -126,41 +139,42 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||||||
table.reset(new MockedBlockBasedTable(new BlockBasedTable::Rep(
|
table.reset(new MockedBlockBasedTable(new BlockBasedTable::Rep(
|
||||||
ioptions, env_options, table_options_, icomp, false)));
|
ioptions, env_options, table_options_, icomp, false)));
|
||||||
auto reader = new PartitionedFilterBlockReader(
|
auto reader = new PartitionedFilterBlockReader(
|
||||||
nullptr, true, BlockContents(slice, false, kNoCompression), nullptr,
|
prefix_extractor, true, BlockContents(slice, false, kNoCompression),
|
||||||
nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
|
nullptr, nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq());
|
||||||
return reader;
|
return reader;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VerifyReader(PartitionedFilterBlockBuilder* builder,
|
void VerifyReader(PartitionedFilterBlockBuilder* builder,
|
||||||
PartitionedIndexBuilder* pib, bool empty = false) {
|
PartitionedIndexBuilder* pib, bool empty = false,
|
||||||
|
const SliceTransform* prefix_extractor = nullptr) {
|
||||||
std::unique_ptr<PartitionedFilterBlockReader> reader(
|
std::unique_ptr<PartitionedFilterBlockReader> reader(
|
||||||
NewReader(builder, pib));
|
NewReader(builder, pib, prefix_extractor));
|
||||||
// Querying added keys
|
// Querying added keys
|
||||||
const bool no_io = true;
|
const bool no_io = true;
|
||||||
for (auto key : keys) {
|
for (auto key : keys) {
|
||||||
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
||||||
const Slice ikey_slice = Slice(*ikey.rep());
|
const Slice ikey_slice = Slice(*ikey.rep());
|
||||||
ASSERT_TRUE(
|
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io,
|
||||||
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
|
&ikey_slice));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
// querying a key twice
|
// querying a key twice
|
||||||
auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
|
auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
|
||||||
const Slice ikey_slice = Slice(*ikey.rep());
|
const Slice ikey_slice = Slice(*ikey.rep());
|
||||||
ASSERT_TRUE(reader->KeyMayMatch(keys[0], nullptr, kNotValid, !no_io,
|
ASSERT_TRUE(reader->KeyMayMatch(keys[0], prefix_extractor, kNotValid,
|
||||||
&ikey_slice));
|
!no_io, &ikey_slice));
|
||||||
}
|
}
|
||||||
// querying missing keys
|
// querying missing keys
|
||||||
for (auto key : missing_keys) {
|
for (auto key : missing_keys) {
|
||||||
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
||||||
const Slice ikey_slice = Slice(*ikey.rep());
|
const Slice ikey_slice = Slice(*ikey.rep());
|
||||||
if (empty) {
|
if (empty) {
|
||||||
ASSERT_TRUE(
|
ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
|
||||||
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
|
!no_io, &ikey_slice));
|
||||||
} else {
|
} else {
|
||||||
// assuming a good hash function
|
// assuming a good hash function
|
||||||
ASSERT_FALSE(
|
ASSERT_FALSE(reader->KeyMayMatch(key, prefix_extractor, kNotValid,
|
||||||
reader->KeyMayMatch(key, nullptr, kNotValid, !no_io, &ikey_slice));
|
!no_io, &ikey_slice));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -187,10 +201,10 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||||||
return CountNumOfIndexPartitions(pib.get());
|
return CountNumOfIndexPartitions(pib.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
void TestBlockPerTwoKeys() {
|
void TestBlockPerTwoKeys(const SliceTransform* prefix_extractor = nullptr) {
|
||||||
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||||
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
|
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
|
||||||
NewBuilder(pib.get()));
|
NewBuilder(pib.get(), prefix_extractor));
|
||||||
int i = 0;
|
int i = 0;
|
||||||
builder->Add(keys[i]);
|
builder->Add(keys[i]);
|
||||||
i++;
|
i++;
|
||||||
@ -203,7 +217,7 @@ class PartitionedFilterBlockTest : public testing::Test {
|
|||||||
builder->Add(keys[i]);
|
builder->Add(keys[i]);
|
||||||
CutABlock(pib.get(), keys[i]);
|
CutABlock(pib.get(), keys[i]);
|
||||||
|
|
||||||
VerifyReader(builder.get(), pib.get());
|
VerifyReader(builder.get(), pib.get(), prefix_extractor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TestBlockPerAllKeys() {
|
void TestBlockPerAllKeys() {
|
||||||
@ -281,6 +295,34 @@ TEST_F(PartitionedFilterBlockTest, TwoBlocksPerKey) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This reproduces the bug that a prefix is the same among multiple consecutive
|
||||||
|
// blocks but the bug would add it only to the first block.
|
||||||
|
TEST_F(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
|
||||||
|
// some small number to cause partition cuts
|
||||||
|
table_options_.metadata_block_size = 1;
|
||||||
|
std::unique_ptr<const SliceTransform> prefix_extractor
|
||||||
|
(rocksdb::NewFixedPrefixTransform(1));
|
||||||
|
std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
|
||||||
|
std::unique_ptr<PartitionedFilterBlockBuilder> builder(
|
||||||
|
NewBuilder(pib.get(), prefix_extractor.get()));
|
||||||
|
const std::string pkeys[3] = {"p-key1", "p-key2", "p-key3"};
|
||||||
|
builder->Add(pkeys[0]);
|
||||||
|
CutABlock(pib.get(), pkeys[0], pkeys[1]);
|
||||||
|
builder->Add(pkeys[1]);
|
||||||
|
CutABlock(pib.get(), pkeys[1], pkeys[2]);
|
||||||
|
builder->Add(pkeys[2]);
|
||||||
|
CutABlock(pib.get(), pkeys[2]);
|
||||||
|
std::unique_ptr<PartitionedFilterBlockReader> reader(
|
||||||
|
NewReader(builder.get(), pib.get(), prefix_extractor.get()));
|
||||||
|
for (auto key : pkeys) {
|
||||||
|
auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
|
||||||
|
const Slice ikey_slice = Slice(*ikey.rep());
|
||||||
|
ASSERT_TRUE(reader->PrefixMayMatch(prefix_extractor->Transform(key),
|
||||||
|
prefix_extractor.get(), kNotValid,
|
||||||
|
false /*no_io*/, &ikey_slice));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
|
TEST_F(PartitionedFilterBlockTest, OneBlockPerKey) {
|
||||||
uint64_t max_index_size = MaxIndexSize();
|
uint64_t max_index_size = MaxIndexSize();
|
||||||
for (uint64_t i = 1; i < max_index_size + 1; i++) {
|
for (uint64_t i = 1; i < max_index_size + 1; i++) {
|
||||||
|
Loading…
Reference in New Issue
Block a user