Handle tombstones at the same seqno in the CollapsedRangeDelMap (#4424)
Summary:
The CollapsedRangeDelMap was entirely mishandling tombstones at the same
sequence number when the tombstones did not have identical start and end
keys. Such tombstones are common since 90fc40690
, which causes
tombstones to be split during compactions.
For example, if the tombstone [a, c) @ 1 lies across a compaction
boundary at b, it will be split into [a, b) @ 1 and [b, c) @ 1. Without
this patch, the collapsed range deletion map would look like this:
a -> 1
b -> 1
c -> 0
Notice how the b -> 1 entry is redundant. When the tombstones overlap,
the problem is even worse. Consider tombstones [a, c) @ 1 and [b, d) @
1, which produces this map without this patch:
a -> 1
b -> 1
c -> 0
d -> 0
This map is corrupt, as a map can never contain adjacent sentinel (zero)
entries. When the iterator advances from b to c, it will notice that c
is a sentinel enty and skip to d--but d is also a sentinel entry! Asking
what tombstone this iterator points to will trigger an assertion, as it
is not pointing to a valid tombstone.
/cc ajkr
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4424
Differential Revision: D10039248
Pulled By: abhimadan
fbshipit-source-id: 6d737c1e88d60e80cf27286726627ba44463e7f4
This commit is contained in:
parent
31d46993cc
commit
17edc82a4b
@ -269,22 +269,36 @@ class CollapsedRangeDelMap : public RangeDelMap {
|
|||||||
// 2: c--- OR 2: c--- OR 2: c--- OR 2: c------
|
// 2: c--- OR 2: c--- OR 2: c--- OR 2: c------
|
||||||
// 1: A--C 1: 1: A------ 1: C------
|
// 1: A--C 1: 1: A------ 1: C------
|
||||||
// ^ ^ ^ ^
|
// ^ ^ ^ ^
|
||||||
|
end_seq = prev_seq();
|
||||||
|
Rep::iterator pit;
|
||||||
|
if (it != rep_.begin() && (pit = std::prev(it)) != rep_.begin() &&
|
||||||
|
ucmp_->Compare(pit->first, t.start_key_) == 0 && std::prev(pit)->second == t.seq_) {
|
||||||
|
// The new tombstone starts at the end of an existing tombstone with an
|
||||||
|
// identical seqno:
|
||||||
|
//
|
||||||
|
// 3:
|
||||||
|
// 2: A--C---
|
||||||
|
// 1:
|
||||||
|
// ^
|
||||||
|
// Merge the tombstones by removing the existing tombstone's end key.
|
||||||
|
it = rep_.erase(std::prev(it));
|
||||||
|
} else {
|
||||||
// Insert a new transition at the new tombstone's start point, or raise
|
// Insert a new transition at the new tombstone's start point, or raise
|
||||||
// the existing transition at that point to the new tombstone's seqno.
|
// the existing transition at that point to the new tombstone's seqno.
|
||||||
end_seq = prev_seq();
|
|
||||||
rep_[t.start_key_] = t.seq_; // operator[] will overwrite existing entry
|
rep_[t.start_key_] = t.seq_; // operator[] will overwrite existing entry
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// The new tombstone's start point is covered by an existing tombstone:
|
// The new tombstone's start point is covered by an existing tombstone:
|
||||||
//
|
//
|
||||||
// 3: A----- OR 3: C------
|
// 3: A----- OR 3: C------ OR
|
||||||
// 2: c--- 2: c------
|
// 2: c--- 2: c------ 2: C------
|
||||||
// ^ ^
|
// ^ ^ ^
|
||||||
// Do nothing.
|
// Do nothing.
|
||||||
}
|
}
|
||||||
|
|
||||||
// Look at all the existing transitions that overlap the new tombstone.
|
// Look at all the existing transitions that overlap the new tombstone.
|
||||||
while (it != rep_.end() && ucmp_->Compare(it->first, t.end_key_) < 0) {
|
while (it != rep_.end() && ucmp_->Compare(it->first, t.end_key_) < 0) {
|
||||||
if (t.seq_ > it->second) {
|
if (t.seq_ >= it->second) {
|
||||||
// The transition is to an existing tombstone that the new tombstone
|
// The transition is to an existing tombstone that the new tombstone
|
||||||
// covers. Save the covered tombstone's seqno. We'll need to return to
|
// covers. Save the covered tombstone's seqno. We'll need to return to
|
||||||
// it if the new tombstone ends before the existing tombstone.
|
// it if the new tombstone ends before the existing tombstone.
|
||||||
@ -328,15 +342,29 @@ class CollapsedRangeDelMap : public RangeDelMap {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (t.seq_ == prev_seq()) {
|
if (t.seq_ == prev_seq()) {
|
||||||
// The new tombstone is unterminated in the map:
|
// The new tombstone is unterminated in the map.
|
||||||
|
if (it != rep_.end() && t.seq_ == it->second && ucmp_->Compare(it->first, t.end_key_) == 0) {
|
||||||
|
// The new tombstone ends at the start of another tombstone with an
|
||||||
|
// identical seqno. Merge the tombstones by removing the existing
|
||||||
|
// tombstone's start key.
|
||||||
|
rep_.erase(it);
|
||||||
|
} else if (end_seq == prev_seq() || (it != rep_.end() && end_seq == it->second)) {
|
||||||
|
// The new tombstone is implicitly ended because its end point is
|
||||||
|
// contained within an existing tombstone with the same seqno:
|
||||||
|
//
|
||||||
|
// 2: ---k--N
|
||||||
|
// ^
|
||||||
|
} else {
|
||||||
|
// The new tombstone needs an explicit end point.
|
||||||
//
|
//
|
||||||
// 3: OR 3: --G OR 3: --G K--
|
// 3: OR 3: --G OR 3: --G K--
|
||||||
// 2: C-------k 2: G---k 2: G---k
|
// 2: C-------k 2: G---k 2: G---k
|
||||||
// ^ ^ ^
|
// ^ ^ ^
|
||||||
// End it now, returning to the last seqno we covered. Because end keys
|
// Install one that returns to the last seqno we covered. Because end
|
||||||
// are exclusive, if there's an existing transition at t.end_key_, it
|
// keys are exclusive, if there's an existing transition at t.end_key_,
|
||||||
// takes precedence over the transition that we install here.
|
// it takes precedence over the transition that we install here.
|
||||||
rep_.emplace(t.end_key_, end_seq); // emplace is a noop if existing entry
|
rep_.emplace(t.end_key_, end_seq); // emplace is a noop if existing entry
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// The new tombstone is implicitly ended because its end point is covered
|
// The new tombstone is implicitly ended because its end point is covered
|
||||||
// by an existing tombstone with a higher seqno.
|
// by an existing tombstone with a higher seqno.
|
||||||
|
@ -208,6 +208,30 @@ TEST_F(RangeDelAggregatorTest, GapsBetweenRanges) {
|
|||||||
{{"a", "b", 5}, {"c", "d", 10}, {"e", "f", 15}});
|
{{"a", "b", 5}, {"c", "d", 10}, {"e", "f", 15}});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(RangeDelAggregatorTest, IdenticalSameSeqNo) {
|
||||||
|
VerifyRangeDels({{"a", "b", 5}, {"a", "b", 5}},
|
||||||
|
{{" ", 0}, {"a", 5}, {"b", 0}},
|
||||||
|
{{"a", "b", 5}});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(RangeDelAggregatorTest, ContiguousSameSeqNo) {
|
||||||
|
VerifyRangeDels({{"a", "b", 5}, {"b", "c", 5}},
|
||||||
|
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 0}},
|
||||||
|
{{"a", "c", 5}});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(RangeDelAggregatorTest, OverlappingSameSeqNo) {
|
||||||
|
VerifyRangeDels({{"a", "c", 5}, {"b", "d", 5}},
|
||||||
|
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 5}, {"d", 0}},
|
||||||
|
{{"a", "d", 5}});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(RangeDelAggregatorTest, CoverSameSeqNo) {
|
||||||
|
VerifyRangeDels({{"a", "d", 5}, {"b", "c", 5}},
|
||||||
|
{{" ", 0}, {"a", 5}, {"b", 5}, {"c", 5}, {"d", 0}},
|
||||||
|
{{"a", "d", 5}});
|
||||||
|
}
|
||||||
|
|
||||||
// Note the Cover* tests also test cases where tombstones are inserted under a
|
// Note the Cover* tests also test cases where tombstones are inserted under a
|
||||||
// larger one when VerifyRangeDels() runs them in reverse
|
// larger one when VerifyRangeDels() runs them in reverse
|
||||||
TEST_F(RangeDelAggregatorTest, CoverMultipleFromLeft) {
|
TEST_F(RangeDelAggregatorTest, CoverMultipleFromLeft) {
|
||||||
|
Loading…
Reference in New Issue
Block a user