diff --git a/db/memtable.cc b/db/memtable.cc index 964993dc8..e9f4d06a6 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -741,12 +741,11 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), comparator_.comparator, true /* one_time_use */, snapshot); - FragmentedRangeTombstoneIterator fragment_iter(&fragment_list, + FragmentedRangeTombstoneIterator fragment_iter(&fragment_list, snapshot, comparator_.comparator); - *max_covering_tombstone_seq = std::max( - *max_covering_tombstone_seq, - MaxCoveringTombstoneSeqnum(&fragment_iter, key.internal_key(), - comparator_.comparator.user_comparator())); + *max_covering_tombstone_seq = + std::max(*max_covering_tombstone_seq, + fragment_iter.MaxCoveringTombstoneSeqnum(key.user_key())); Slice user_key = key.user_key(); bool found_final_value = false; diff --git a/db/range_tombstone_fragmenter.cc b/db/range_tombstone_fragmenter.cc index d6bdac1c3..ce6251ccd 100644 --- a/db/range_tombstone_fragmenter.cc +++ b/db/range_tombstone_fragmenter.cc @@ -109,8 +109,8 @@ void FragmentedRangeTombstoneList::FragmentTombstones( // Flush a range tombstone fragment [cur_start_key, cur_end_key), which // should not overlap with the last-flushed tombstone fragment. assert(tombstones_.empty() || - icmp.user_comparator()->Compare(tombstones_.back().end_key_, - cur_start_key) <= 0); + icmp.user_comparator()->Compare(tombstones_.back().end_key, + cur_start_key) <= 0); if (one_time_use) { SequenceNumber max_seqnum = 0; @@ -118,9 +118,10 @@ void FragmentedRangeTombstoneList::FragmentTombstones( max_seqnum = std::max(max_seqnum, flush_it->sequence); } - // Flush only the tombstone fragment with the highest sequence number. - tombstones_.push_back( - RangeTombstone(cur_start_key, cur_end_key, max_seqnum)); + size_t start_idx = tombstone_seqs_.size(); + tombstone_seqs_.push_back(max_seqnum); + tombstones_.emplace_back(cur_start_key, cur_end_key, start_idx, + start_idx + 1); } else { // Sort the sequence numbers of the tombstones being fragmented in // descending order, and then flush them in that order. @@ -130,10 +131,12 @@ void FragmentedRangeTombstoneList::FragmentTombstones( } std::sort(seqnums_to_flush.begin(), seqnums_to_flush.end(), std::greater()); - for (const auto seq : seqnums_to_flush) { - tombstones_.push_back( - RangeTombstone(cur_start_key, cur_end_key, seq)); - } + size_t start_idx = tombstone_seqs_.size(); + size_t end_idx = start_idx + seqnums_to_flush.size(); + tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(), + seqnums_to_flush.end()); + tombstones_.emplace_back(cur_start_key, cur_end_key, start_idx, + end_idx); } cur_start_key = cur_end_key; } @@ -195,12 +198,13 @@ void FragmentedRangeTombstoneList::FragmentTombstones( } FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( - const FragmentedRangeTombstoneList* tombstones, + const FragmentedRangeTombstoneList* tombstones, SequenceNumber snapshot, const InternalKeyComparator& icmp) - : tombstone_cmp_(icmp.user_comparator()), - icmp_(&icmp), + : tombstone_start_cmp_(icmp.user_comparator()), + tombstone_end_cmp_(icmp.user_comparator()), ucmp_(icmp.user_comparator()), - tombstones_(tombstones) { + tombstones_(tombstones), + snapshot_(snapshot) { assert(tombstones_ != nullptr); pos_ = tombstones_->end(); pinned_pos_ = tombstones_->end(); @@ -208,94 +212,129 @@ FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( const std::shared_ptr& tombstones, - const InternalKeyComparator& icmp) - : tombstone_cmp_(icmp.user_comparator()), - icmp_(&icmp), + SequenceNumber snapshot, const InternalKeyComparator& icmp) + : tombstone_start_cmp_(icmp.user_comparator()), + tombstone_end_cmp_(icmp.user_comparator()), ucmp_(icmp.user_comparator()), tombstones_ref_(tombstones), - tombstones_(tombstones_ref_.get()) { + tombstones_(tombstones_ref_.get()), + snapshot_(snapshot) { assert(tombstones_ != nullptr); pos_ = tombstones_->end(); + seq_pos_ = tombstones_->seq_end(); pinned_pos_ = tombstones_->end(); + pinned_seq_pos_ = tombstones_->seq_end(); } void FragmentedRangeTombstoneIterator::SeekToFirst() { pos_ = tombstones_->begin(); + seq_pos_ = tombstones_->seq_begin(); } void FragmentedRangeTombstoneIterator::SeekToLast() { pos_ = tombstones_->end(); + seq_pos_ = tombstones_->seq_end(); Prev(); } void FragmentedRangeTombstoneIterator::Seek(const Slice& target) { if (tombstones_->empty()) { - pos_ = tombstones_->end(); + Invalidate(); return; } - RangeTombstone search(ExtractUserKey(target), ExtractUserKey(target), - GetInternalKeySeqno(target)); - pos_ = std::lower_bound(tombstones_->begin(), tombstones_->end(), search, - tombstone_cmp_); + SeekToCoveringTombstone(target); + while (pos_ != tombstones_->end() && + seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) { + ++pos_; + if (pos_ == tombstones_->end()) { + return; + } + seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), + tombstones_->seq_iter(pos_->seq_end_idx), + snapshot_, std::greater()); + } } void FragmentedRangeTombstoneIterator::SeekForPrev(const Slice& target) { - Seek(target); - if (!Valid()) { - SeekToLast(); + if (tombstones_->empty()) { + Invalidate(); + return; } - ParsedInternalKey parsed_target; - if (!ParseInternalKey(target, &parsed_target)) { - assert(false); - } - ParsedInternalKey parsed_start_key; - ParseKey(&parsed_start_key); - while (Valid() && icmp_->Compare(parsed_target, parsed_start_key) < 0) { - Prev(); - ParseKey(&parsed_start_key); + SeekForPrevToCoveringTombstone(target); + while (pos_ != tombstones_->end() && + seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) { + if (pos_ == tombstones_->begin()) { + Invalidate(); + return; + } + --pos_; + seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), + tombstones_->seq_iter(pos_->seq_end_idx), + snapshot_, std::greater()); } } -void FragmentedRangeTombstoneIterator::Next() { ++pos_; } +void FragmentedRangeTombstoneIterator::SeekToCoveringTombstone( + const Slice& target) { + pos_ = std::upper_bound(tombstones_->begin(), tombstones_->end(), target, + tombstone_end_cmp_); + if (pos_ == tombstones_->end()) { + // All tombstones end before target. + seq_pos_ = tombstones_->seq_end(); + return; + } + seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), + tombstones_->seq_iter(pos_->seq_end_idx), + snapshot_, std::greater()); +} -void FragmentedRangeTombstoneIterator::Prev() { +void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone( + const Slice& target) { + if (tombstones_->empty()) { + Invalidate(); + return; + } + pos_ = std::upper_bound(tombstones_->begin(), tombstones_->end(), target, + tombstone_start_cmp_); if (pos_ == tombstones_->begin()) { - pos_ = tombstones_->end(); + // All tombstones start after target. + Invalidate(); return; } --pos_; + seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), + tombstones_->seq_iter(pos_->seq_end_idx), + snapshot_, std::greater()); +} + +void FragmentedRangeTombstoneIterator::Next() { + ++seq_pos_; + if (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) { + ++pos_; + } +} + +void FragmentedRangeTombstoneIterator::Prev() { + if (seq_pos_ == tombstones_->seq_begin()) { + pos_ = tombstones_->end(); + seq_pos_ = tombstones_->seq_end(); + return; + } + --seq_pos_; + if (pos_ == tombstones_->end() || + seq_pos_ == tombstones_->seq_iter(pos_->seq_start_idx - 1)) { + --pos_; + } } bool FragmentedRangeTombstoneIterator::Valid() const { return tombstones_ != nullptr && pos_ != tombstones_->end(); } -SequenceNumber MaxCoveringTombstoneSeqnum( - FragmentedRangeTombstoneIterator* tombstone_iter, const Slice& lookup_key, - const Comparator* ucmp) { - if (tombstone_iter == nullptr) { - return 0; - } - - SequenceNumber snapshot = GetInternalKeySeqno(lookup_key); - Slice user_key = ExtractUserKey(lookup_key); - - tombstone_iter->Seek(lookup_key); - SequenceNumber highest_covering_seqnum = 0; - if (!tombstone_iter->Valid()) { - // Seeked past the last tombstone - tombstone_iter->Prev(); - } - while (tombstone_iter->Valid() && - ucmp->Compare(user_key, tombstone_iter->value()) < 0) { - if (tombstone_iter->seq() <= snapshot && - ucmp->Compare(tombstone_iter->user_key(), user_key) <= 0) { - highest_covering_seqnum = - std::max(highest_covering_seqnum, tombstone_iter->seq()); - } - tombstone_iter->Prev(); - } - return highest_covering_seqnum; +SequenceNumber FragmentedRangeTombstoneIterator::MaxCoveringTombstoneSeqnum( + const Slice& user_key) { + SeekToCoveringTombstone(user_key); + return ValidPos() && ucmp_->Compare(start_key(), user_key) <= 0 ? seq() : 0; } } // namespace rocksdb diff --git a/db/range_tombstone_fragmenter.h b/db/range_tombstone_fragmenter.h index e7b2aa573..2d6ca691f 100644 --- a/db/range_tombstone_fragmenter.h +++ b/db/range_tombstone_fragmenter.h @@ -19,31 +19,61 @@ namespace rocksdb { struct FragmentedRangeTombstoneList { public: + // A compact representation of a "stack" of range tombstone fragments, which + // start and end at the same user keys but have different sequence numbers. + // The members seq_start_idx and seq_end_idx are intended to be parameters to + // seq_iter(). + struct RangeTombstoneStack { + RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx, + size_t end_idx) + : start_key(start), + end_key(end), + seq_start_idx(start_idx), + seq_end_idx(end_idx) {} + + Slice start_key; + Slice end_key; + size_t seq_start_idx; + size_t seq_end_idx; + }; FragmentedRangeTombstoneList( std::unique_ptr unfragmented_tombstones, const InternalKeyComparator& icmp, bool one_time_use, SequenceNumber snapshot = kMaxSequenceNumber); - std::vector::const_iterator begin() const { + std::vector::const_iterator begin() const { return tombstones_.begin(); } - std::vector::const_iterator end() const { + std::vector::const_iterator end() const { return tombstones_.end(); } + std::vector::const_iterator seq_iter(size_t idx) const { + return std::next(tombstone_seqs_.begin(), idx); + } + + std::vector::const_iterator seq_begin() const { + return tombstone_seqs_.begin(); + } + + std::vector::const_iterator seq_end() const { + return tombstone_seqs_.end(); + } + bool empty() const { return tombstones_.size() == 0; } private: // Given an ordered range tombstone iterator unfragmented_tombstones, // "fragment" the tombstones into non-overlapping pieces, and store them in - // tombstones_. + // tombstones_ and tombstone_seqs_. void FragmentTombstones( std::unique_ptr unfragmented_tombstones, const InternalKeyComparator& icmp, bool one_time_use, SequenceNumber snapshot = kMaxSequenceNumber); - std::vector tombstones_; + std::vector tombstones_; + std::vector tombstone_seqs_; std::list pinned_slices_; PinnedIteratorsManager pinned_iters_mgr_; }; @@ -60,15 +90,28 @@ struct FragmentedRangeTombstoneList { class FragmentedRangeTombstoneIterator : public InternalIterator { public: FragmentedRangeTombstoneIterator( - const FragmentedRangeTombstoneList* tombstones, + const FragmentedRangeTombstoneList* tombstones, SequenceNumber snapshot, const InternalKeyComparator& icmp); FragmentedRangeTombstoneIterator( const std::shared_ptr& tombstones, - const InternalKeyComparator& icmp); + SequenceNumber snapshot, const InternalKeyComparator& icmp); void SeekToFirst() override; void SeekToLast() override; + + // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator + // seeking should behave. This is OK because they are not currently used, but + // eventually FragmentedRangeTombstoneIterator should no longer implement + // InternalIterator. + // + // Seeks to the range tombstone that covers target at a seqnum in the + // snapshot. If no such tombstone exists, seek to the earliest tombstone in + // the snapshot that ends after target. void Seek(const Slice& target) override; + // Seeks to the range tombstone that covers target at a seqnum in the + // snapshot. If no such tombstone exists, seek to the latest tombstone in the + // snapshot that starts before target. void SeekForPrev(const Slice& target) override; + void Next() override; void Prev() override; bool Valid() const override; @@ -76,55 +119,88 @@ class FragmentedRangeTombstoneIterator : public InternalIterator { MaybePinKey(); return current_start_key_.Encode(); } - Slice value() const override { return pos_->end_key_; } + Slice value() const override { return pos_->end_key; } bool IsKeyPinned() const override { return false; } bool IsValuePinned() const override { return true; } Status status() const override { return Status::OK(); } - Slice user_key() const { return pos_->start_key_; } - SequenceNumber seq() const { return pos_->seq_; } + Slice start_key() const { return pos_->start_key; } + Slice end_key() const { return pos_->end_key; } + SequenceNumber seq() const { return *seq_pos_; } + + SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key); private: - struct FragmentedRangeTombstoneComparator { - explicit FragmentedRangeTombstoneComparator(const Comparator* c) : cmp(c) {} + using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack; - bool operator()(const RangeTombstone& a, const RangeTombstone& b) const { - int user_key_cmp = cmp->Compare(a.start_key_, b.start_key_); - if (user_key_cmp != 0) { - return user_key_cmp < 0; - } - return a.seq_ > b.seq_; + struct RangeTombstoneStackStartComparator { + explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {} + + bool operator()(const RangeTombstoneStack& a, + const RangeTombstoneStack& b) const { + return cmp->Compare(a.start_key, b.start_key) < 0; + } + + bool operator()(const RangeTombstoneStack& a, const Slice& b) const { + return cmp->Compare(a.start_key, b) < 0; + } + + bool operator()(const Slice& a, const RangeTombstoneStack& b) const { + return cmp->Compare(a, b.start_key) < 0; + } + + const Comparator* cmp; + }; + + struct RangeTombstoneStackEndComparator { + explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {} + + bool operator()(const RangeTombstoneStack& a, + const RangeTombstoneStack& b) const { + return cmp->Compare(a.end_key, b.end_key) < 0; + } + + bool operator()(const RangeTombstoneStack& a, const Slice& b) const { + return cmp->Compare(a.end_key, b) < 0; + } + + bool operator()(const Slice& a, const RangeTombstoneStack& b) const { + return cmp->Compare(a, b.end_key) < 0; } const Comparator* cmp; }; void MaybePinKey() const { - if (pos_ != tombstones_->end() && pinned_pos_ != pos_) { - current_start_key_.Set(pos_->start_key_, pos_->seq_, kTypeRangeDeletion); + if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() && + (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) { + current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion); pinned_pos_ = pos_; + pinned_seq_pos_ = seq_pos_; } } - void ParseKey(ParsedInternalKey* parsed) const { - parsed->user_key = pos_->start_key_; - parsed->sequence = pos_->seq_; - parsed->type = kTypeRangeDeletion; + void SeekToCoveringTombstone(const Slice& key); + void SeekForPrevToCoveringTombstone(const Slice& key); + void Invalidate() { + pos_ = tombstones_->end(); + seq_pos_ = tombstones_->seq_end(); + } + bool ValidPos() const { + return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx); } - const FragmentedRangeTombstoneComparator tombstone_cmp_; - const InternalKeyComparator* icmp_; + const RangeTombstoneStackStartComparator tombstone_start_cmp_; + const RangeTombstoneStackEndComparator tombstone_end_cmp_; const Comparator* ucmp_; std::shared_ptr tombstones_ref_; const FragmentedRangeTombstoneList* tombstones_; - std::vector::const_iterator pos_; - mutable std::vector::const_iterator pinned_pos_; + SequenceNumber snapshot_; + std::vector::const_iterator pos_; + std::vector::const_iterator seq_pos_; + mutable std::vector::const_iterator pinned_pos_; + mutable std::vector::const_iterator pinned_seq_pos_; mutable InternalKey current_start_key_; - PinnedIteratorsManager pinned_iters_mgr_; }; -SequenceNumber MaxCoveringTombstoneSeqnum( - FragmentedRangeTombstoneIterator* tombstone_iter, const Slice& key, - const Comparator* ucmp); - } // namespace rocksdb diff --git a/db/range_tombstone_fragmenter_test.cc b/db/range_tombstone_fragmenter_test.cc index 4bea5b4c1..f9ea35654 100644 --- a/db/range_tombstone_fragmenter_test.cc +++ b/db/range_tombstone_fragmenter_test.cc @@ -35,50 +35,60 @@ void VerifyFragmentedRangeDels( iter->SeekToFirst(); for (size_t i = 0; i < expected_tombstones.size() && iter->Valid(); i++, iter->Next()) { - EXPECT_EQ(ExtractUserKey(iter->key()), expected_tombstones[i].start_key_); + EXPECT_EQ(iter->start_key(), expected_tombstones[i].start_key_); EXPECT_EQ(iter->value(), expected_tombstones[i].end_key_); - EXPECT_EQ(GetInternalKeySeqno(iter->key()), expected_tombstones[i].seq_); + EXPECT_EQ(iter->seq(), expected_tombstones[i].seq_); } EXPECT_FALSE(iter->Valid()); } -struct SeekForPrevTestCase { +struct SeekTestCase { Slice seek_target; RangeTombstone expected_position; bool out_of_range; }; -void VerifySeekForPrev(FragmentedRangeTombstoneIterator* iter, - const std::vector& cases) { +void VerifySeek(FragmentedRangeTombstoneIterator* iter, + const std::vector& cases) { for (const auto& testcase : cases) { - InternalKey ikey_seek_target(testcase.seek_target, 0, kTypeRangeDeletion); - iter->SeekForPrev(ikey_seek_target.Encode()); + iter->Seek(testcase.seek_target); if (testcase.out_of_range) { ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(ExtractUserKey(iter->key()), - testcase.expected_position.start_key_); - EXPECT_EQ(iter->value(), testcase.expected_position.end_key_); - EXPECT_EQ(GetInternalKeySeqno(iter->key()), - testcase.expected_position.seq_); + EXPECT_EQ(testcase.expected_position.start_key_, iter->start_key()); + EXPECT_EQ(testcase.expected_position.end_key_, iter->value()); + EXPECT_EQ(testcase.expected_position.seq_, iter->seq()); + } + } +} + +void VerifySeekForPrev(FragmentedRangeTombstoneIterator* iter, + const std::vector& cases) { + for (const auto& testcase : cases) { + iter->SeekForPrev(testcase.seek_target); + if (testcase.out_of_range) { + ASSERT_FALSE(iter->Valid()); + } else { + ASSERT_TRUE(iter->Valid()); + EXPECT_EQ(testcase.expected_position.start_key_, iter->start_key()); + EXPECT_EQ(testcase.expected_position.end_key_, iter->value()); + EXPECT_EQ(testcase.expected_position.seq_, iter->seq()); } } } struct MaxCoveringTombstoneSeqnumTestCase { Slice user_key; - int result; + SequenceNumber result; }; void VerifyMaxCoveringTombstoneSeqnum( - FragmentedRangeTombstoneIterator* iter, const Comparator* ucmp, + FragmentedRangeTombstoneIterator* iter, const std::vector& cases) { for (const auto& testcase : cases) { - InternalKey key_and_snapshot(testcase.user_key, kMaxSequenceNumber, - kTypeValue); - EXPECT_EQ(testcase.result, MaxCoveringTombstoneSeqnum( - iter, key_and_snapshot.Encode(), ucmp)); + EXPECT_EQ(testcase.result, + iter->MaxCoveringTombstoneSeqnum(testcase.user_key)); } } @@ -89,9 +99,10 @@ TEST_F(RangeTombstoneFragmenterTest, NonOverlappingTombstones) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); VerifyFragmentedRangeDels(&iter, {{"a", "b", 10}, {"c", "d", 5}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, bytewise_icmp.user_comparator(), + VerifyMaxCoveringTombstoneSeqnum(&iter, {{"", 0}, {"a", 10}, {"b", 0}, {"c", 5}}); } @@ -100,10 +111,11 @@ TEST_F(RangeTombstoneFragmenterTest, OverlappingTombstones) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 15}, {"e", "g", 15}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, bytewise_icmp.user_comparator(), + VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"c", 15}, {"e", 15}, {"g", 0}}); } @@ -113,10 +125,11 @@ TEST_F(RangeTombstoneFragmenterTest, ContiguousTombstones) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 20}, {"e", "g", 15}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, bytewise_icmp.user_comparator(), + VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"c", 20}, {"e", 15}, {"g", 0}}); } @@ -126,10 +139,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartAndEndKey) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, bytewise_icmp.user_comparator(), - {{"a", 10}, {"b", 10}, {"c", 0}}); + VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"b", 10}, {"c", 0}}); } TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyDifferentEndKeys) { @@ -138,10 +151,11 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyDifferentEndKeys) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 10}, {"e", "g", 7}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, bytewise_icmp.user_comparator(), + VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"c", 10}, {"e", 7}, {"g", 0}}); } @@ -154,10 +168,11 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyMixedEndKeys) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); VerifyFragmentedRangeDels(&iter, {{"a", "c", 30}, {"c", "e", 20}, {"e", "g", 20}}); - VerifyMaxCoveringTombstoneSeqnum(&iter, bytewise_icmp.user_comparator(), + VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 30}, {"c", 20}, {"e", 20}, {"g", 0}}); } @@ -170,7 +185,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 10}, {"e", "g", 8}, @@ -178,8 +194,7 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { {"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( - &iter, bytewise_icmp.user_comparator(), - {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + &iter, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); } TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyWithSnapshot) { @@ -191,12 +206,12 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyWithSnapshot) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */, 9); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, 9 /* snapshot */, + bytewise_icmp); VerifyFragmentedRangeDels( &iter, {{"c", "g", 8}, {"g", "i", 6}, {"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( - &iter, bytewise_icmp.user_comparator(), - {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + &iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); } TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) { @@ -208,12 +223,12 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, true /* one_time_use */, 9); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, 9 /* snapshot */, + bytewise_icmp); VerifyFragmentedRangeDels( &iter, {{"c", "g", 8}, {"g", "i", 6}, {"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( - &iter, bytewise_icmp.user_comparator(), - {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + &iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); } TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyMultiUse) { @@ -225,23 +240,41 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyMultiUse) { FragmentedRangeTombstoneList fragment_list( std::move(range_del_iter), bytewise_icmp, false /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); - VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, - {"c", "e", 10}, - {"c", "e", 8}, - {"c", "e", 6}, - {"e", "g", 8}, - {"e", "g", 6}, - {"g", "i", 6}, - {"j", "l", 4}, - {"j", "l", 2}, - {"l", "n", 4}}); + FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); + FragmentedRangeTombstoneIterator iter2(&fragment_list, 9 /* snapshot */, + bytewise_icmp); + FragmentedRangeTombstoneIterator iter3(&fragment_list, 7 /* snapshot */, + bytewise_icmp); + FragmentedRangeTombstoneIterator iter4(&fragment_list, 5 /* snapshot */, + bytewise_icmp); + FragmentedRangeTombstoneIterator iter5(&fragment_list, 3 /* snapshot */, + bytewise_icmp); + for (auto* iter : {&iter1, &iter2, &iter3, &iter4, &iter5}) { + VerifyFragmentedRangeDels(iter, {{"a", "c", 10}, + {"c", "e", 10}, + {"c", "e", 8}, + {"c", "e", 6}, + {"e", "g", 8}, + {"e", "g", 6}, + {"g", "i", 6}, + {"j", "l", 4}, + {"j", "l", 2}, + {"l", "n", 4}}); + } VerifyMaxCoveringTombstoneSeqnum( - &iter, bytewise_icmp.user_comparator(), - {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + &iter1, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + VerifyMaxCoveringTombstoneSeqnum( + &iter2, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + VerifyMaxCoveringTombstoneSeqnum( + &iter3, {{"a", 0}, {"c", 6}, {"e", 6}, {"i", 0}, {"j", 4}, {"m", 4}}); + VerifyMaxCoveringTombstoneSeqnum( + &iter4, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 4}, {"m", 4}}); + VerifyMaxCoveringTombstoneSeqnum( + &iter5, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 2}, {"m", 0}}); } -TEST_F(RangeTombstoneFragmenterTest, SeekForPrevStartKey) { +TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}, @@ -250,30 +283,58 @@ TEST_F(RangeTombstoneFragmenterTest, SeekForPrevStartKey) { {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list( - std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); - VerifySeekForPrev( - &iter, + std::move(range_del_iter), bytewise_icmp, false /* one_time_use */); + + FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); + VerifySeek( + &iter1, {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); -} - -TEST_F(RangeTombstoneFragmenterTest, SeekForPrevCovered) { - // Same tombstones as OverlapAndRepeatedStartKey. - auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, - {"c", "g", 8}, - {"c", "i", 6}, - {"j", "n", 4}, - {"j", "l", 2}}); - - FragmentedRangeTombstoneList fragment_list( - std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); VerifySeekForPrev( - &iter, + &iter1, + {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); + + FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, + bytewise_icmp); + VerifySeek(&iter2, {{"a", {"j", "l", 2}}, + {"e", {"j", "l", 2}}, + {"l", {}, true /* out of range */}}); + VerifySeekForPrev(&iter2, {{"a", {}, true /* out of range */}, + {"e", {}, true /* out of range */}, + {"l", {"j", "l", 2}}}); +} + +TEST_F(RangeTombstoneFragmenterTest, SeekCovered) { + // Same tombstones as OverlapAndRepeatedStartKey. + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"c", "g", 8}, + {"c", "i", 6}, + {"j", "n", 4}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list( + std::move(range_del_iter), bytewise_icmp, false /* one_time_use */); + + FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); + VerifySeek( + &iter1, {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); + VerifySeekForPrev( + &iter1, + {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); + + FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, + bytewise_icmp); + VerifySeek(&iter2, {{"b", {"j", "l", 2}}, + {"f", {"j", "l", 2}}, + {"m", {}, true /* out of range */}}); + VerifySeekForPrev(&iter2, {{"b", {}, true /* out of range */}, + {"f", {}, true /* out of range */}, + {"m", {"j", "l", 2}}}); } -TEST_F(RangeTombstoneFragmenterTest, SeekForPrevEndKey) { +TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}, @@ -282,15 +343,32 @@ TEST_F(RangeTombstoneFragmenterTest, SeekForPrevEndKey) { {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list( - std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); - VerifySeekForPrev(&iter, {{"c", {"c", "e", 10}}, - {"g", {"g", "i", 6}}, - {"i", {"g", "i", 6}}, - {"n", {"l", "n", 4}}}); + std::move(range_del_iter), bytewise_icmp, false /* one_time_use */); + + FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); + VerifySeek(&iter1, {{"c", {"c", "e", 10}}, + {"g", {"g", "i", 6}}, + {"i", {"j", "l", 4}}, + {"n", {}, true /* out of range */}}); + VerifySeekForPrev(&iter1, {{"c", {"c", "e", 10}}, + {"g", {"g", "i", 6}}, + {"i", {"g", "i", 6}}, + {"n", {"l", "n", 4}}}); + + FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, + bytewise_icmp); + VerifySeek(&iter2, {{"c", {"j", "l", 2}}, + {"g", {"j", "l", 2}}, + {"i", {"j", "l", 2}}, + {"n", {}, true /* out of range */}}); + VerifySeekForPrev(&iter2, {{"c", {}, true /* out of range */}, + {"g", {}, true /* out of range */}, + {"i", {}, true /* out of range */}, + {"n", {"j", "l", 2}}}); } -TEST_F(RangeTombstoneFragmenterTest, SeekForPrevOutOfBounds) { +TEST_F(RangeTombstoneFragmenterTest, SeekOutOfBounds) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, {"c", "g", 8}, @@ -299,12 +377,47 @@ TEST_F(RangeTombstoneFragmenterTest, SeekForPrevOutOfBounds) { {"j", "l", 2}}); FragmentedRangeTombstoneList fragment_list( - std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); - FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp); + std::move(range_del_iter), bytewise_icmp, false /* one_time_use */); + + FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); + VerifySeek(&iter, {{"", {"a", "c", 10}}, {"z", {}, true /* out of range */}}); VerifySeekForPrev(&iter, {{"", {}, true /* out of range */}, {"z", {"l", "n", 4}}}); } +TEST_F(RangeTombstoneFragmenterTest, SeekOneTimeUse) { + // Same tombstones as OverlapAndRepeatedStartKey. + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"c", "g", 8}, + {"c", "i", 6}, + {"j", "n", 4}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list( + std::move(range_del_iter), bytewise_icmp, true /* one_time_use */); + + FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, + bytewise_icmp); + VerifySeek( + &iter1, + {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); + VerifySeekForPrev( + &iter1, + {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); + + // No tombstone fragments exist at this snapshot because they were dropped + // when the list was created. + FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, + bytewise_icmp); + VerifySeek(&iter2, {{"a", {}, true /* out of range */}, + {"e", {}, true /* out of range */}, + {"l", {}, true /* out of range */}}); + VerifySeekForPrev(&iter2, {{"a", {}, true /* out of range */}, + {"e", {}, true /* out of range */}, + {"l", {}, true /* out of range */}}); +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/db/table_cache.cc b/db/table_cache.cc index c4a7359a5..7c558b785 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -377,14 +377,14 @@ Status TableCache::Get(const ReadOptions& options, get_context->max_covering_tombstone_seq(); if (s.ok() && max_covering_tombstone_seq != nullptr && !options.ignore_range_deletions) { - std::unique_ptr range_del_iter( - t->NewRangeTombstoneIterator(options)); - *max_covering_tombstone_seq = - std::max(*max_covering_tombstone_seq, - MaxCoveringTombstoneSeqnum( - static_cast( - range_del_iter.get()), - k, internal_comparator.user_comparator())); + std::unique_ptr range_del_iter( + static_cast( + t->NewRangeTombstoneIterator(options))); + if (range_del_iter != nullptr) { + *max_covering_tombstone_seq = std::max( + *max_covering_tombstone_seq, + range_del_iter->MaxCoveringTombstoneSeqnum(ExtractUserKey(k))); + } } if (s.ok()) { get_context->SetReplayLog(row_cache_entry); // nullptr if no cache. diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 524700467..2d767677b 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -2314,12 +2314,16 @@ InternalIterator* BlockBasedTable::NewIterator( } InternalIterator* BlockBasedTable::NewRangeTombstoneIterator( - const ReadOptions& /* read_options */) { + const ReadOptions& read_options) { if (rep_->fragmented_range_dels == nullptr) { return nullptr; } - return new FragmentedRangeTombstoneIterator(rep_->fragmented_range_dels, - rep_->internal_comparator); + SequenceNumber snapshot = kMaxSequenceNumber; + if (read_options.snapshot != nullptr) { + snapshot = read_options.snapshot->GetSequenceNumber(); + } + return new FragmentedRangeTombstoneIterator( + rep_->fragmented_range_dels, snapshot, rep_->internal_comparator); } InternalIterator* BlockBasedTable::NewUnfragmentedRangeTombstoneIterator(