From c86a22ac433efb87a7c73449d7aa5ef6c4580530 Mon Sep 17 00:00:00 2001 From: Abhishek Madan Date: Mon, 10 Sep 2018 17:38:15 -0700 Subject: [PATCH] Restrict RangeDelAggregator's tombstone end-key truncation (#4356) Summary: `RangeDelAggregator::AddTombstones` contained an assertion which stated that, if a range tombstone extended past the largest key in the sstable, then `FileMetaData::largest` must have a sentinel sequence number of `kMaxSequenceNumber`, which implies that the tombstone's end key is safe to truncate. However, `largest` will not be a sentinel key when the next sstable in the level's smallest key is equal to the current sstable's largest key, which caused the assertion to fail. The assertion must hold for the truncation to be safe, so it has been moved to an additional check on end-key truncation. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4356 Differential Revision: D9760891 Pulled By: abhimadan fbshipit-source-id: 7c20c3885cd919dcd14f291f88fd27aa33defebc --- db/range_del_aggregator.cc | 23 +++++++++++------------ db/range_del_aggregator_test.cc | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/db/range_del_aggregator.cc b/db/range_del_aggregator.cc index 111f81daa..09e642bc9 100644 --- a/db/range_del_aggregator.cc +++ b/db/range_del_aggregator.cc @@ -480,18 +480,17 @@ Status RangeDelAggregator::AddTombstones( } } if (largest != nullptr) { - // This is subtly correct despite the discrepancy between - // FileMetaData::largest being inclusive while RangeTombstone::end_key_ - // is exclusive. A tombstone will only extend past the bounds of an - // sstable if its end-key is the largest key in the table. If that - // occurs, the largest key for the table is set based on the smallest - // key in the next table in the level. In that case, largest->user_key() - // is not actually a key in the current table and thus we can use it as - // the exclusive end-key for the tombstone. - if (icmp_.user_comparator()->Compare( - tombstone.end_key_, largest->user_key()) > 0) { - // The largest key should be a tombstone sentinel key. - assert(GetInternalKeySeqno(largest->Encode()) == kMaxSequenceNumber); + // To safely truncate the range tombstone's end key, it must extend past + // the largest key in the sstable (which may have been extended to the + // smallest key in the next sstable), and largest must be a tombstone + // sentinel key. A range tombstone may straddle two sstables and not be + // the tombstone sentinel key in the first sstable if a user-key also + // straddles the sstables (possible if there is a snapshot between the + // two versions of the user-key), in which case we cannot truncate the + // range tombstone. + if (icmp_.user_comparator()->Compare(tombstone.end_key_, + largest->user_key()) > 0 && + GetInternalKeySeqno(largest->Encode()) == kMaxSequenceNumber) { tombstone.end_key_ = largest->user_key(); } } diff --git a/db/range_del_aggregator_test.cc b/db/range_del_aggregator_test.cc index 6b3423c2a..ab18f6d29 100644 --- a/db/range_del_aggregator_test.cc +++ b/db/range_del_aggregator_test.cc @@ -300,6 +300,21 @@ TEST_F(RangeDelAggregatorTest, TruncateTombstones) { &smallest, &largest); } +TEST_F(RangeDelAggregatorTest, OverlappingLargestKeyTruncateTombstones) { + const InternalKey smallest("b", 1, kTypeRangeDeletion); + const InternalKey largest( + "e", 3, // could happen if "e" is in consecutive sstables + kTypeValue); + VerifyRangeDels( + {{"a", "c", 10}, {"d", "f", 10}}, + {{"a", 10, true}, // truncated + {"b", 10, false}, // not truncated + {"d", 10, false}, // not truncated + {"e", 10, false}}, // not truncated + {{"b", "c", 10}, {"d", "f", 10}}, + &smallest, &largest); +} + } // namespace rocksdb int main(int argc, char** argv) {