Handle tombstones at the same seqno in the CollapsedRangeDelMap (#4424)
Summary: The CollapsedRangeDelMap was entirely mishandling tombstones at the same sequence number when the tombstones did not have identical start and end keys. Such tombstones are common since 90fc40690, which causes tombstones to be split during compactions. For example, if the tombstone [a, c) @ 1 lies across a compaction boundary at b, it will be split into [a, b) @ 1 and [b, c) @ 1. Without this patch, the collapsed range deletion map would look like this: a -> 1 b -> 1 c -> 0 Notice how the b -> 1 entry is redundant. When the tombstones overlap, the problem is even worse. Consider tombstones [a, c) @ 1 and [b, d) @ 1, which produces this map without this patch: a -> 1 b -> 1 c -> 0 d -> 0 This map is corrupt, as a map can never contain adjacent sentinel (zero) entries. When the iterator advances from b to c, it will notice that c is a sentinel enty and skip to d--but d is also a sentinel entry! Asking what tombstone this iterator points to will trigger an assertion, as it is not pointing to a valid tombstone. /cc ajkr Pull Request resolved: https://github.com/facebook/rocksdb/pull/4424 Differential Revision: D10039248 Pulled By: abhimadan fbshipit-source-id: 6d737c1e88d60e80cf27286726627ba44463e7f4
This commit is contained in:
parent
31d46993cc
commit
17edc82a4b
@ -269,22 +269,36 @@ class CollapsedRangeDelMap : public RangeDelMap {
|
||||
// 2: c--- OR 2: c--- OR 2: c--- OR 2: c------
|
||||
// 1: A--C 1: 1: A------ 1: C------
|
||||
// ^ ^ ^ ^
|
||||
end_seq = prev_seq();
|
||||
Rep::iterator pit;
|
||||
if (it != rep_.begin() && (pit = std::prev(it)) != rep_.begin() &&
|
||||
ucmp_->Compare(pit->first, t.start_key_) == 0 && std::prev(pit)->second == t.seq_) {
|
||||
// The new tombstone starts at the end of an existing tombstone with an
|
||||
// identical seqno:
|
||||
//
|
||||
// 3:
|
||||
// 2: A--C---
|
||||
// 1:
|
||||
// ^
|
||||
// Merge the tombstones by removing the existing tombstone's end key.
|
||||
it = rep_.erase(std::prev(it));
|
||||
} else {
|
||||
// Insert a new transition at the new tombstone's start point, or raise
|
||||
// the existing transition at that point to the new tombstone's seqno.
|
||||
end_seq = prev_seq();
|
||||
rep_[t.start_key_] = t.seq_; // operator[] will overwrite existing entry
|
||||
}
|
||||
} else {
|
||||
// The new tombstone's start point is covered by an existing tombstone:
|
||||
//
|
||||
// 3: A----- OR 3: C------
|
||||
// 2: c--- 2: c------
|
||||
// ^ ^
|
||||
// 3: A----- OR 3: C------ OR
|
||||
// 2: c--- 2: c------ 2: C------
|
||||
// ^ ^ ^
|
||||
// Do nothing.
|
||||
}
|
||||
|
||||
// Look at all the existing transitions that overlap the new tombstone.
|
||||
while (it != rep_.end() && ucmp_->Compare(it->first, t.end_key_) < 0) {
|
||||
if (t.seq_ > it->second) {
|
||||
if (t.seq_ >= it->second) {
|
||||
// The transition is to an existing tombstone that the new tombstone
|
||||
// covers. Save the covered tombstone's seqno. We'll need to return to
|
||||
// it if the new tombstone ends before the existing tombstone.
|
||||
@ -328,15 +342,29 @@ class CollapsedRangeDelMap : public RangeDelMap {
|
||||
}
|
||||
|
||||
if (t.seq_ == prev_seq()) {
|
||||
// The new tombstone is unterminated in the map:
|
||||
// The new tombstone is unterminated in the map.
|
||||
if (it != rep_.end() && t.seq_ == it->second && ucmp_->Compare(it->first, t.end_key_) == 0) {
|
||||
// The new tombstone ends at the start of another tombstone with an
|
||||
// identical seqno. Merge the tombstones by removing the existing
|
||||
// tombstone's start key.
|
||||
rep_.erase(it);
|
||||
} else if (end_seq == prev_seq() || (it != rep_.end() && end_seq == it->second)) {
|
||||
// The new tombstone is implicitly ended because its end point is
|
||||
// contained within an existing tombstone with the same seqno:
|
||||
//
|
||||
// 2: ---k--N
|
||||
// ^
|
||||
} else {
|
||||
// The new tombstone needs an explicit end point.
|
||||
//
|
||||
// 3: OR 3: --G OR 3: --G K--
|
||||
// 2: C-------k 2: G---k 2: G---k
|
||||
// ^ ^ ^
|
||||
// End it now, returning to the last seqno we covered. Because end keys
|
||||
// are exclusive, if there's an existing transition at t.end_key_, it
|
||||
// takes precedence over the transition that we install here.
|
||||
// Install one that returns to the last seqno we covered. Because end
|
||||
// keys are exclusive, if there's an existing transition at t.end_key_,
|
||||
// it takes precedence over the transition that we install here.
|
||||
rep_.emplace(t.end_key_, end_seq); // emplace is a noop if existing entry
|
||||
}
|
||||
} else {
|
||||
// The new tombstone is implicitly ended because its end point is covered
|
||||
// by an existing tombstone with a higher seqno.
|
||||
|
@ -208,6 +208,30 @@ TEST_F(RangeDelAggregatorTest, GapsBetweenRanges) {
|
||||
{{"a", "b", 5}, {"c", "d", 10}, {"e", "f", 15}});
|
||||
}
|
||||
|
||||
TEST_F(RangeDelAggregatorTest, IdenticalSameSeqNo) {
|
||||
VerifyRangeDels({{"a", "b", 5}, {"a", "b", 5}},
|
||||
{{" ", 0}, {"a", 5}, {"b", 0}},
|
||||
{{"a", "b", 5}});
|
||||
}
|
||||
|
||||
TEST_F(RangeDelAggregatorTest, ContiguousSameSeqNo) {
|
||||
VerifyRangeDels({{"a", "b", 5}, {"b", "c", 5}},
|
||||
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 0}},
|
||||
{{"a", "c", 5}});
|
||||
}
|
||||
|
||||
TEST_F(RangeDelAggregatorTest, OverlappingSameSeqNo) {
|
||||
VerifyRangeDels({{"a", "c", 5}, {"b", "d", 5}},
|
||||
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 5}, {"d", 0}},
|
||||
{{"a", "d", 5}});
|
||||
}
|
||||
|
||||
TEST_F(RangeDelAggregatorTest, CoverSameSeqNo) {
|
||||
VerifyRangeDels({{"a", "d", 5}, {"b", "c", 5}},
|
||||
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 5}, {"d", 0}},
|
||||
{{"a", "d", 5}});
|
||||
}
|
||||
|
||||
// Note the Cover* tests also test cases where tombstones are inserted under a
|
||||
// larger one when VerifyRangeDels() runs them in reverse
|
||||
TEST_F(RangeDelAggregatorTest, CoverMultipleFromLeft) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user