diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index f233c1331..4e97651b7 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -1357,6 +1357,38 @@ TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) { db_->ReleaseSnapshot(snapshot); } +TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) { + const int kFileBytes = 1 << 20; + + Options options = CurrentOptions(); + options.compression = kNoCompression; + options.disable_auto_compactions = true; + options.target_file_size_base = kFileBytes; + Reopen(options); + + ASSERT_OK(Put(Key(0), "a")); + const Snapshot* snapshot = db_->GetSnapshot(); + + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), + Key(10))); + + db_->Flush(FlushOptions()); + + ReadOptions read_opts; + read_opts.snapshot = snapshot; + auto* iter = db_->NewIterator(read_opts); + + iter->SeekToFirst(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(Key(0), iter->key()); + + iter->Next(); + ASSERT_FALSE(iter->Valid()); + + delete iter; + db_->ReleaseSnapshot(snapshot); +} + #endif // ROCKSDB_LITE } // namespace rocksdb diff --git a/db/range_del_aggregator.cc b/db/range_del_aggregator.cc index 58ab3f5e4..81e1da039 100644 --- a/db/range_del_aggregator.cc +++ b/db/range_del_aggregator.cc @@ -441,11 +441,9 @@ RangeDelAggregator::RangeDelAggregator(const InternalKeyComparator& icmp, void RangeDelAggregator::InitRep(const std::vector& snapshots) { assert(rep_ == nullptr); rep_.reset(new Rep()); - for (auto snapshot : snapshots) { - rep_->stripe_map_.emplace(snapshot, NewRangeDelMap()); - } + rep_->snapshots_ = snapshots; // Data newer than any snapshot falls in this catch-all stripe - rep_->stripe_map_.emplace(kMaxSequenceNumber, NewRangeDelMap()); + rep_->snapshots_.emplace_back(kMaxSequenceNumber); rep_->pinned_iters_mgr_.StartPinning(); } @@ -474,11 +472,11 @@ bool RangeDelAggregator::ShouldDeleteImpl(const ParsedInternalKey& parsed, RangeDelPositioningMode mode) { assert(IsValueType(parsed.type)); assert(rep_ != nullptr); - auto& tombstone_map = GetRangeDelMap(parsed.sequence); - if (tombstone_map.IsEmpty()) { + auto* tombstone_map = GetRangeDelMapIfExists(parsed.sequence); + if (tombstone_map == nullptr || tombstone_map->IsEmpty()) { return false; } - return tombstone_map.ShouldDelete(parsed, mode); + return tombstone_map->ShouldDelete(parsed, mode); } bool RangeDelAggregator::IsRangeOverlapped(const Slice& start, @@ -492,7 +490,7 @@ bool RangeDelAggregator::IsRangeOverlapped(const Slice& start, ParsedInternalKey start_ikey(start, kMaxSequenceNumber, kMaxValue); ParsedInternalKey end_ikey(end, 0, static_cast(0)); for (const auto& stripe : rep_->stripe_map_) { - if (stripe.second->IsRangeOverlapped(start_ikey, end_ikey)) { + if (stripe.second.first->IsRangeOverlapped(start_ikey, end_ikey)) { return true; } } @@ -587,24 +585,42 @@ void RangeDelAggregator::InvalidateRangeDelMapPositions() { return; } for (auto& stripe : rep_->stripe_map_) { - stripe.second->InvalidatePosition(); + stripe.second.first->InvalidatePosition(); } } +RangeDelMap* RangeDelAggregator::GetRangeDelMapIfExists(SequenceNumber seq) { + assert(rep_ != nullptr); + // The stripe includes seqnum for the snapshot above and excludes seqnum for + // the snapshot below. + if (rep_->stripe_map_.empty()) { + return nullptr; + } + StripeMap::iterator iter = rep_->stripe_map_.lower_bound(seq); + if (iter == rep_->stripe_map_.end()) { + return nullptr; + } + size_t snapshot_idx = iter->second.second; + if (snapshot_idx > 0 && seq <= rep_->snapshots_[snapshot_idx - 1]) { + return nullptr; + } + return iter->second.first.get(); +} + RangeDelMap& RangeDelAggregator::GetRangeDelMap(SequenceNumber seq) { assert(rep_ != nullptr); // The stripe includes seqnum for the snapshot above and excludes seqnum for // the snapshot below. - StripeMap::iterator iter; - if (seq > 0) { - // upper_bound() checks strict inequality so need to subtract one - iter = rep_->stripe_map_.upper_bound(seq - 1); - } else { - iter = rep_->stripe_map_.begin(); - } + std::vector::iterator iter = + std::lower_bound(rep_->snapshots_.begin(), rep_->snapshots_.end(), seq); // catch-all stripe justifies this assertion in either of above cases - assert(iter != rep_->stripe_map_.end()); - return *iter->second; + assert(iter != rep_->snapshots_.end()); + if (rep_->stripe_map_.find(*iter) == rep_->stripe_map_.end()) { + rep_->stripe_map_.emplace( + *iter, + std::make_pair(NewRangeDelMap(), iter - rep_->snapshots_.begin())); + } + return *rep_->stripe_map_[*iter].first; } bool RangeDelAggregator::IsEmpty() { @@ -612,7 +628,7 @@ bool RangeDelAggregator::IsEmpty() { return true; } for (const auto& stripe : rep_->stripe_map_) { - if (!stripe.second->IsEmpty()) { + if (!stripe.second.first->IsEmpty()) { return false; } } @@ -696,7 +712,7 @@ std::unique_ptr RangeDelAggregator::NewIterator() { new MergingRangeDelIter(icmp_.user_comparator())); if (rep_ != nullptr) { for (const auto& stripe : rep_->stripe_map_) { - iter->AddIterator(stripe.second->NewIterator()); + iter->AddIterator(stripe.second.first->NewIterator()); } } return std::move(iter); diff --git a/db/range_del_aggregator.h b/db/range_del_aggregator.h index 459de866d..8a89ec9f1 100644 --- a/db/range_del_aggregator.h +++ b/db/range_del_aggregator.h @@ -200,10 +200,15 @@ class RangeDelAggregator { private: // Maps snapshot seqnum -> map of tombstones that fall in that stripe, i.e., - // their seqnums are greater than the next smaller snapshot's seqnum. - typedef std::map> StripeMap; + // their seqnums are greater than the next smaller snapshot's seqnum, and the + // corresponding index into the list of snapshots. Each entry is lazily + // initialized. + typedef std::map, size_t>> + StripeMap; struct Rep { + std::vector snapshots_; StripeMap stripe_map_; PinnedIteratorsManager pinned_iters_mgr_; std::list pinned_slices_; @@ -215,6 +220,7 @@ class RangeDelAggregator { void InitRep(const std::vector& snapshots); std::unique_ptr NewRangeDelMap(); + RangeDelMap* GetRangeDelMapIfExists(SequenceNumber seq); RangeDelMap& GetRangeDelMap(SequenceNumber seq); SequenceNumber upper_bound_;