From b47cc5851673dd12fa94ffdef2d09840f40e3edd Mon Sep 17 00:00:00 2001 From: Ari Ekmekji Date: Tue, 18 Aug 2015 14:56:31 -0700 Subject: [PATCH] Bounding Number of Subcompactions Summary: In D43239 (https://reviews.facebook.net/D43239) the number of subcompactions is set based on the number of L1 files with unique starting keys. In certain cases when this number is very large this causes issues, particularly with the overlap between files since very small output files can be generated. This diff bounds the number of subcompactions to the user option DBOption.num_subcompactions. Test Plan: ./db_test ./db_compaction_test Reviewers: sdong, igor, anthony, yhchiang Reviewed By: yhchiang Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D44883 --- db/compaction_job.cc | 28 ++++++++++++++++++++++++---- db/db_compaction_test.cc | 7 ++----- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/db/compaction_job.cc b/db/compaction_job.cc index 5d842a3e7..51295b5ea 100644 --- a/db/compaction_job.cc +++ b/db/compaction_job.cc @@ -324,9 +324,6 @@ void CompactionJob::InitializeSubCompactions(const SequenceNumber& earliest, Compaction* c = compact_->compaction; auto& bounds = sub_compaction_boundaries_; if (c->IsSubCompaction()) { - // TODO(aekmekji): take the option num_subcompactions into account - // when dividing up the key range between multiple iterators instead - // of just assigning each iterator one L1 file's key range auto* cmp = c->column_family_data()->user_comparator(); for (size_t which = 0; which < c->num_input_levels(); which++) { if (c->level(which) == 1) { @@ -334,6 +331,7 @@ void CompactionJob::InitializeSubCompactions(const SequenceNumber& earliest, size_t num_files = flevel->num_files; if (num_files > 1) { + std::vector candidates; auto& files = flevel->files; Slice global_min = ExtractUserKey(files[0].smallest_key); Slice global_max = ExtractUserKey(files[num_files - 1].largest_key); @@ -351,9 +349,31 @@ void CompactionJob::InitializeSubCompactions(const SequenceNumber& earliest, if ( (i == num_files - 1 && cmp->Compare(s1, global_max) < 0) || (i < num_files - 1 && cmp->Compare(s1, s2) < 0 && cmp->Compare(s1, global_min) > 0)) { - bounds.emplace_back(s1); + candidates.emplace_back(s1); } } + + // Divide the potential L1 file boundaries (those that passed the + // checks above) into 'num_subcompactions' groups such that each have + // as close to an equal number of files in it as possible + // TODO(aekmekji): refine this later to depend on file size + size_t files_left = candidates.size(); + size_t subcompactions_left = + static_cast(db_options_.num_subcompactions) < files_left + ? db_options_.num_subcompactions + : files_left; + + size_t num_to_include; + size_t index = 0; + + while (files_left > 1 && subcompactions_left > 1) { + // Cheaper way to do 'round(num_files / num_subcompactions)' + num_to_include = files_left / subcompactions_left; + index += num_to_include; + sub_compaction_boundaries_.emplace_back(candidates[index]); + files_left -= num_to_include; + subcompactions_left--; + } } break; } diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 0fbb4005c..a8aaea097 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -1284,11 +1284,8 @@ TEST_P(DBCompactionTestWithParam, ManualLevelCompactionOutputPathId) { compact_options.target_path_id = 1; db_->CompactRange(compact_options, handles_[1], nullptr, nullptr); - int num_files = options.num_subcompactions > 1 ? 2 : 1; - std::string files_string = options.num_subcompactions > 1 ? "0,2" : "0,1"; - - ASSERT_EQ(files_string, FilesPerLevel(1)); - ASSERT_EQ(num_files, GetSstFileCount(options.db_paths[1].path)); + ASSERT_EQ("0,1", FilesPerLevel(1)); + ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path)); ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path)); ASSERT_EQ(0, GetSstFileCount(dbname_));