Properly set upper bound of subcompaction output (#4879) (#4898)

Summary:
Fix the ouput overlap bug when using subcompactions, the upper bound of output
file was extended incorrectly.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4898

Differential Revision: D13736107

Pulled By: ajkr

fbshipit-source-id: 21dca09f81d5f07bf2766bf566f9b50dcab7d8e3
This commit is contained in:
yangzhijia 2019-02-05 10:15:33 -08:00 committed by Andrew Kryczka
parent a1774dde9a
commit b7434c29d2

View File

@ -1204,10 +1204,19 @@ Status CompactionJob::FinishCompactionOutputFile(
lower_bound = nullptr; lower_bound = nullptr;
} }
if (next_table_min_key != nullptr) { if (next_table_min_key != nullptr) {
// This isn't the last file in the subcompaction, so extend until the next // This may be the last file in the subcompaction in some cases, so we
// file starts. // need to compare the end key of subcompaction with the next file start
// key. When the end key is chosen by the subcompaction, we know that
// it must be the biggest key in output file. Therefore, it is safe to
// use the smaller key as the upper bound of the output file, to ensure
// that there is no overlapping between different output files.
upper_bound_guard = ExtractUserKey(*next_table_min_key); upper_bound_guard = ExtractUserKey(*next_table_min_key);
upper_bound = &upper_bound_guard; if (sub_compact->end != nullptr &&
ucmp->Compare(upper_bound_guard, *sub_compact->end) >= 0) {
upper_bound = sub_compact->end;
} else {
upper_bound = &upper_bound_guard;
}
} else { } else {
// This is the last file in the subcompaction, so extend until the // This is the last file in the subcompaction, so extend until the
// subcompaction ends. // subcompaction ends.
@ -1225,6 +1234,13 @@ Status CompactionJob::FinishCompactionOutputFile(
has_overlapping_endpoints = false; has_overlapping_endpoints = false;
} }
// The end key of the subcompaction must be bigger or equal to the upper
// bound. If the end of subcompaction is null or the upper bound is null,
// it means that this file is the last file in the compaction. So there
// will be no overlapping between this file and others.
assert(sub_compact->end == nullptr ||
upper_bound == nullptr ||
ucmp->Compare(*upper_bound , *sub_compact->end) <= 0);
auto it = range_del_agg->NewIterator(lower_bound, upper_bound, auto it = range_del_agg->NewIterator(lower_bound, upper_bound,
has_overlapping_endpoints); has_overlapping_endpoints);
// Position the range tombstone output iterator. There may be tombstone // Position the range tombstone output iterator. There may be tombstone