Bounding Number of Subcompactions
Summary: In D43239 (https://reviews.facebook.net/D43239) the number of subcompactions is set based on the number of L1 files with unique starting keys. In certain cases when this number is very large this causes issues, particularly with the overlap between files since very small output files can be generated. This diff bounds the number of subcompactions to the user option DBOption.num_subcompactions. Test Plan: ./db_test ./db_compaction_test Reviewers: sdong, igor, anthony, yhchiang Reviewed By: yhchiang Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D44883
This commit is contained in:
parent
e58e1b18e7
commit
b47cc58516
@ -324,9 +324,6 @@ void CompactionJob::InitializeSubCompactions(const SequenceNumber& earliest,
|
||||
Compaction* c = compact_->compaction;
|
||||
auto& bounds = sub_compaction_boundaries_;
|
||||
if (c->IsSubCompaction()) {
|
||||
// TODO(aekmekji): take the option num_subcompactions into account
|
||||
// when dividing up the key range between multiple iterators instead
|
||||
// of just assigning each iterator one L1 file's key range
|
||||
auto* cmp = c->column_family_data()->user_comparator();
|
||||
for (size_t which = 0; which < c->num_input_levels(); which++) {
|
||||
if (c->level(which) == 1) {
|
||||
@ -334,6 +331,7 @@ void CompactionJob::InitializeSubCompactions(const SequenceNumber& earliest,
|
||||
size_t num_files = flevel->num_files;
|
||||
|
||||
if (num_files > 1) {
|
||||
std::vector<Slice> candidates;
|
||||
auto& files = flevel->files;
|
||||
Slice global_min = ExtractUserKey(files[0].smallest_key);
|
||||
Slice global_max = ExtractUserKey(files[num_files - 1].largest_key);
|
||||
@ -351,9 +349,31 @@ void CompactionJob::InitializeSubCompactions(const SequenceNumber& earliest,
|
||||
if ( (i == num_files - 1 && cmp->Compare(s1, global_max) < 0)
|
||||
|| (i < num_files - 1 && cmp->Compare(s1, s2) < 0 &&
|
||||
cmp->Compare(s1, global_min) > 0)) {
|
||||
bounds.emplace_back(s1);
|
||||
candidates.emplace_back(s1);
|
||||
}
|
||||
}
|
||||
|
||||
// Divide the potential L1 file boundaries (those that passed the
|
||||
// checks above) into 'num_subcompactions' groups such that each have
|
||||
// as close to an equal number of files in it as possible
|
||||
// TODO(aekmekji): refine this later to depend on file size
|
||||
size_t files_left = candidates.size();
|
||||
size_t subcompactions_left =
|
||||
static_cast<size_t>(db_options_.num_subcompactions) < files_left
|
||||
? db_options_.num_subcompactions
|
||||
: files_left;
|
||||
|
||||
size_t num_to_include;
|
||||
size_t index = 0;
|
||||
|
||||
while (files_left > 1 && subcompactions_left > 1) {
|
||||
// Cheaper way to do 'round(num_files / num_subcompactions)'
|
||||
num_to_include = files_left / subcompactions_left;
|
||||
index += num_to_include;
|
||||
sub_compaction_boundaries_.emplace_back(candidates[index]);
|
||||
files_left -= num_to_include;
|
||||
subcompactions_left--;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1284,11 +1284,8 @@ TEST_P(DBCompactionTestWithParam, ManualLevelCompactionOutputPathId) {
|
||||
compact_options.target_path_id = 1;
|
||||
db_->CompactRange(compact_options, handles_[1], nullptr, nullptr);
|
||||
|
||||
int num_files = options.num_subcompactions > 1 ? 2 : 1;
|
||||
std::string files_string = options.num_subcompactions > 1 ? "0,2" : "0,1";
|
||||
|
||||
ASSERT_EQ(files_string, FilesPerLevel(1));
|
||||
ASSERT_EQ(num_files, GetSstFileCount(options.db_paths[1].path));
|
||||
ASSERT_EQ("0,1", FilesPerLevel(1));
|
||||
ASSERT_EQ(1, GetSstFileCount(options.db_paths[1].path));
|
||||
ASSERT_EQ(0, GetSstFileCount(options.db_paths[0].path));
|
||||
ASSERT_EQ(0, GetSstFileCount(dbname_));
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user