diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 92d74ad1f..724a217bb 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -449,12 +449,14 @@ Options DBTestBase::GetOptions( break; } case kBlockBasedTableWithPartitionedIndexFormat3: { - table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; - options.prefix_extractor.reset(NewNoopTransform()); + table_options.format_version = 3; // Format 3 changes the binary index format. Since partitioned index is a // super-set of simple indexes, we are also using kTwoLevelIndexSearch to // test this format. - table_options.format_version = 3; + table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch; + // The top-level index in partition filters are also affected by format 3. + table_options.filter_policy.reset(NewBloomFilterPolicy(10, false)); + table_options.partition_filters = true; break; } case kBlockBasedTableWithIndexRestartInterval: { diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index b1df1fb9b..180bc76e5 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -237,16 +237,18 @@ class PartitionIndexReader : public IndexReader, public Cleanable { return NewTwoLevelIterator( new BlockBasedTable::PartitionedIndexIteratorState( table_, &partition_map_, index_key_includes_seq_), - index_block_->NewIterator( - icomparator_, icomparator_->user_comparator(), nullptr, true)); + index_block_->NewIterator(icomparator_, + icomparator_->user_comparator(), nullptr, + true, nullptr, index_key_includes_seq_)); } else { auto ro = ReadOptions(); ro.fill_cache = fill_cache; bool kIsIndex = true; return new BlockBasedTableIterator( table_, ro, *icomparator_, - index_block_->NewIterator( - icomparator_, icomparator_->user_comparator(), nullptr, true), + index_block_->NewIterator(icomparator_, + icomparator_->user_comparator(), nullptr, + true, nullptr, index_key_includes_seq_), false, /* prefix_extractor */ nullptr, kIsIndex, index_key_includes_seq_); } @@ -262,7 +264,7 @@ class PartitionIndexReader : public IndexReader, public Cleanable { BlockIter biter; BlockHandle handle; index_block_->NewIterator(icomparator_, icomparator_->user_comparator(), - &biter, true); + &biter, true, nullptr, index_key_includes_seq_); // Index partitions are assumed to be consecuitive. Prefetch them all. // Read the first block offset biter.SeekToFirst(); @@ -1308,7 +1310,9 @@ FilterBlockReader* BlockBasedTable::ReadFilter( return new PartitionedFilterBlockReader( rep->prefix_filtering ? prefix_extractor : nullptr, rep->whole_key_filtering, std::move(block), nullptr, - rep->ioptions.statistics, rep->internal_comparator, this); + rep->ioptions.statistics, rep->internal_comparator, this, + rep_->table_properties == nullptr || + !rep_->table_properties->index_key_is_user_key); } case Rep::FilterType::kBlockFilter: diff --git a/table/index_builder.cc b/table/index_builder.cc index 8a7fb8982..a9f20bf31 100644 --- a/table/index_builder.cc +++ b/table/index_builder.cc @@ -66,6 +66,8 @@ PartitionedIndexBuilder::PartitionedIndexBuilder( : IndexBuilder(comparator), index_block_builder_(table_opt.index_block_restart_interval, table_opt.format_version), + index_block_builder_without_seq_(table_opt.index_block_restart_interval, + table_opt.format_version), sub_index_builder_(nullptr), table_opt_(table_opt), seperator_is_key_plus_seq_(false) {} @@ -149,11 +151,20 @@ Status PartitionedIndexBuilder::Finish( std::string handle_encoding; last_partition_block_handle.EncodeTo(&handle_encoding); index_block_builder_.Add(last_entry.key, handle_encoding); + if (!seperator_is_key_plus_seq_) { + index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key), + handle_encoding); + } entries_.pop_front(); } // If there is no sub_index left, then return the 2nd level index. if (UNLIKELY(entries_.empty())) { - index_blocks->index_block_contents = index_block_builder_.Finish(); + if (seperator_is_key_plus_seq_) { + index_blocks->index_block_contents = index_block_builder_.Finish(); + } else { + index_blocks->index_block_contents = + index_block_builder_without_seq_.Finish(); + } return Status::OK(); } else { // Finish the next partition index in line and Incomplete() to indicate we @@ -192,7 +203,9 @@ size_t PartitionedIndexBuilder::EstimateTopLevelIndexSize( uint64_t size = it->value->EstimatedSize(); BlockHandle tmp_block_handle(offset, size); tmp_block_handle.EncodeTo(&tmp_handle_encoding); - tmp_builder.Add(it->key, tmp_handle_encoding); + tmp_builder.Add( + seperator_is_key_plus_seq_ ? it->key : ExtractUserKey(it->key), + tmp_handle_encoding); offset += size; } return tmp_builder.CurrentSizeEstimate(); diff --git a/table/index_builder.h b/table/index_builder.h index cde7496d4..0ad15221e 100644 --- a/table/index_builder.h +++ b/table/index_builder.h @@ -368,6 +368,7 @@ class PartitionedIndexBuilder : public IndexBuilder { }; std::list entries_; // list of partitioned indexes and their keys BlockBuilder index_block_builder_; // top-level index builder + BlockBuilder index_block_builder_without_seq_; // same for user keys // the active partition index builder ShortenedIndexBuilder* sub_index_builder_; // the last key in the active partition index builder diff --git a/table/partitioned_filter_block.cc b/table/partitioned_filter_block.cc index 2e03dc979..28cc3736b 100644 --- a/table/partitioned_filter_block.cc +++ b/table/partitioned_filter_block.cc @@ -24,6 +24,7 @@ PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder( : FullFilterBlockBuilder(prefix_extractor, whole_key_filtering, filter_bits_builder), index_on_filter_block_builder_(index_block_restart_interval), + index_on_filter_block_builder_without_seq_(index_block_restart_interval), p_index_builder_(p_index_builder), filters_in_partition_(0), num_added_(0) { @@ -65,6 +66,10 @@ Slice PartitionedFilterBlockBuilder::Finish( std::string handle_encoding; last_partition_block_handle.EncodeTo(&handle_encoding); index_on_filter_block_builder_.Add(last_entry.key, handle_encoding); + if (!p_index_builder_->seperator_is_key_plus_seq()) { + index_on_filter_block_builder_without_seq_.Add( + ExtractUserKey(last_entry.key), handle_encoding); + } filters.pop_front(); } else { MaybeCutAFilterBlock(); @@ -74,7 +79,11 @@ Slice PartitionedFilterBlockBuilder::Finish( if (UNLIKELY(filters.empty())) { *status = Status::OK(); if (finishing_filters) { - return index_on_filter_block_builder_.Finish(); + if (p_index_builder_->seperator_is_key_plus_seq()) { + return index_on_filter_block_builder_.Finish(); + } else { + return index_on_filter_block_builder_without_seq_.Finish(); + } } else { // This is the rare case where no key was added to the filter return Slice(); @@ -91,12 +100,13 @@ Slice PartitionedFilterBlockBuilder::Finish( PartitionedFilterBlockReader::PartitionedFilterBlockReader( const SliceTransform* prefix_extractor, bool _whole_key_filtering, BlockContents&& contents, FilterBitsReader* /*filter_bits_reader*/, - Statistics* stats, const Comparator& comparator, - const BlockBasedTable* table) + Statistics* stats, const InternalKeyComparator comparator, + const BlockBasedTable* table, const bool index_key_includes_seq) : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering), prefix_extractor_(prefix_extractor), comparator_(comparator), - table_(table) { + table_(table), + index_key_includes_seq_(index_key_includes_seq) { idx_on_fltr_blk_.reset(new Block(std::move(contents), kDisableGlobalSequenceNumber, 0 /* read_amp_bytes_per_bit */, stats)); @@ -113,7 +123,8 @@ PartitionedFilterBlockReader::~PartitionedFilterBlockReader() { char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length]; BlockIter biter; BlockHandle handle; - idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &biter, true); + idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(), + &biter, true, nullptr, index_key_includes_seq_); biter.SeekToFirst(); for (; biter.Valid(); biter.Next()) { auto input = biter.value(); @@ -207,7 +218,8 @@ bool PartitionedFilterBlockReader::PrefixMayMatch( Slice PartitionedFilterBlockReader::GetFilterPartitionHandle( const Slice& entry) { BlockIter iter; - idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &iter, true); + idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(), + &iter, true, nullptr, index_key_includes_seq_); iter.Seek(entry); if (UNLIKELY(!iter.Valid())) { return Slice(); @@ -269,7 +281,8 @@ void PartitionedFilterBlockReader::CacheDependencies( auto rep = table_->rep_; BlockIter biter; BlockHandle handle; - idx_on_fltr_blk_->NewIterator(&comparator_, &comparator_, &biter, true); + idx_on_fltr_blk_->NewIterator(&comparator_, comparator_.user_comparator(), + &biter, true, nullptr, index_key_includes_seq_); // Index partitions are assumed to be consecuitive. Prefetch them all. // Read the first block offset biter.SeekToFirst(); diff --git a/table/partitioned_filter_block.h b/table/partitioned_filter_block.h index c9b358f9d..86ec038a9 100644 --- a/table/partitioned_filter_block.h +++ b/table/partitioned_filter_block.h @@ -41,6 +41,8 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder { private: // Filter data BlockBuilder index_on_filter_block_builder_; // top-level index builder + BlockBuilder + index_on_filter_block_builder_without_seq_; // same for user keys struct FilterEntry { std::string key; Slice filter; @@ -68,13 +70,11 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder { class PartitionedFilterBlockReader : public FilterBlockReader, public Cleanable { public: - explicit PartitionedFilterBlockReader(const SliceTransform* prefix_extractor, - bool whole_key_filtering, - BlockContents&& contents, - FilterBitsReader* filter_bits_reader, - Statistics* stats, - const Comparator& comparator, - const BlockBasedTable* table); + explicit PartitionedFilterBlockReader( + const SliceTransform* prefix_extractor, bool whole_key_filtering, + BlockContents&& contents, FilterBitsReader* filter_bits_reader, + Statistics* stats, const InternalKeyComparator comparator, + const BlockBasedTable* table, const bool index_key_includes_seq); virtual ~PartitionedFilterBlockReader(); virtual bool IsBlockBased() override { return false; } @@ -98,8 +98,9 @@ class PartitionedFilterBlockReader : public FilterBlockReader, const SliceTransform* prefix_extractor_; std::unique_ptr idx_on_fltr_blk_; - const Comparator& comparator_; + const InternalKeyComparator comparator_; const BlockBasedTable* table_; + const bool index_key_includes_seq_; std::unordered_map> filter_map_; diff --git a/table/partitioned_filter_block_test.cc b/table/partitioned_filter_block_test.cc index ac47f2504..6317e9107 100644 --- a/table/partitioned_filter_block_test.cc +++ b/table/partitioned_filter_block_test.cc @@ -111,7 +111,7 @@ class PartitionedFilterBlockTest : public testing::Test { std::unique_ptr table; PartitionedFilterBlockReader* NewReader( - PartitionedFilterBlockBuilder* builder) { + PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) { BlockHandle bh; Status status; Slice slice; @@ -127,13 +127,14 @@ class PartitionedFilterBlockTest : public testing::Test { ioptions, env_options, table_options_, icomp, false))); auto reader = new PartitionedFilterBlockReader( nullptr, true, BlockContents(slice, false, kNoCompression), nullptr, - nullptr, *icomp.user_comparator(), table.get()); + nullptr, icomp, table.get(), pib->seperator_is_key_plus_seq()); return reader; } void VerifyReader(PartitionedFilterBlockBuilder* builder, - bool empty = false) { - std::unique_ptr reader(NewReader(builder)); + PartitionedIndexBuilder* pib, bool empty = false) { + std::unique_ptr reader( + NewReader(builder, pib)); // Querying added keys const bool no_io = true; for (auto key : keys) { @@ -182,7 +183,7 @@ class PartitionedFilterBlockTest : public testing::Test { builder->Add(keys[i]); CutABlock(pib.get(), keys[i]); - VerifyReader(builder.get()); + VerifyReader(builder.get(), pib.get()); return CountNumOfIndexPartitions(pib.get()); } @@ -202,7 +203,7 @@ class PartitionedFilterBlockTest : public testing::Test { builder->Add(keys[i]); CutABlock(pib.get(), keys[i]); - VerifyReader(builder.get()); + VerifyReader(builder.get(), pib.get()); } void TestBlockPerAllKeys() { @@ -220,7 +221,7 @@ class PartitionedFilterBlockTest : public testing::Test { builder->Add(keys[i]); CutABlock(pib.get(), keys[i]); - VerifyReader(builder.get()); + VerifyReader(builder.get(), pib.get()); } void CutABlock(PartitionedIndexBuilder* builder, @@ -261,7 +262,7 @@ TEST_F(PartitionedFilterBlockTest, EmptyBuilder) { std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder(NewBuilder(pib.get())); const bool empty = true; - VerifyReader(builder.get(), empty); + VerifyReader(builder.get(), pib.get(), empty); } TEST_F(PartitionedFilterBlockTest, OneBlock) {