From b7434c29d2a6643de879160b179c663dd38283f6 Mon Sep 17 00:00:00 2001 From: yangzhijia Date: Tue, 5 Feb 2019 10:15:33 -0800 Subject: [PATCH] Properly set upper bound of subcompaction output (#4879) (#4898) Summary: Fix the ouput overlap bug when using subcompactions, the upper bound of output file was extended incorrectly. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4898 Differential Revision: D13736107 Pulled By: ajkr fbshipit-source-id: 21dca09f81d5f07bf2766bf566f9b50dcab7d8e3 --- db/compaction_job.cc | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 0bdf78cfc..10aaef098 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -1204,10 +1204,19 @@ Status CompactionJob::FinishCompactionOutputFile( lower_bound = nullptr; } if (next_table_min_key != nullptr) { - // This isn't the last file in the subcompaction, so extend until the next - // file starts. + // This may be the last file in the subcompaction in some cases, so we + // need to compare the end key of subcompaction with the next file start + // key. When the end key is chosen by the subcompaction, we know that + // it must be the biggest key in output file. Therefore, it is safe to + // use the smaller key as the upper bound of the output file, to ensure + // that there is no overlapping between different output files. upper_bound_guard = ExtractUserKey(*next_table_min_key); - upper_bound = &upper_bound_guard; + if (sub_compact->end != nullptr && + ucmp->Compare(upper_bound_guard, *sub_compact->end) >= 0) { + upper_bound = sub_compact->end; + } else { + upper_bound = &upper_bound_guard; + } } else { // This is the last file in the subcompaction, so extend until the // subcompaction ends. @@ -1225,6 +1234,13 @@ Status CompactionJob::FinishCompactionOutputFile( has_overlapping_endpoints = false; } + // The end key of the subcompaction must be bigger or equal to the upper + // bound. If the end of subcompaction is null or the upper bound is null, + // it means that this file is the last file in the compaction. So there + // will be no overlapping between this file and others. + assert(sub_compact->end == nullptr || + upper_bound == nullptr || + ucmp->Compare(*upper_bound , *sub_compact->end) <= 0); auto it = range_del_agg->NewIterator(lower_bound, upper_bound, has_overlapping_endpoints); // Position the range tombstone output iterator. There may be tombstone