Add a new compaction priority that picks file whose overlapping ratio is smallest

Summary:
Add a new compaction priority as following:
For every file, we calculate total size of files overalapping with the file in the next level, over the file's size itself. The file with smallest ratio will be picked first.
My "db_bench --fillrandom" shows about 5% less compaction than kOldestSmallestSeqFirst if --hard_pending_compaction_bytes_limit value to keep LSM tree in shape. If not limiting hard_pending_compaction_bytes_limit, improvement is only 1% or 2%.

Test Plan: Add a unit test

Reviewers: andrewkr, kradhakrishnan, anthony, IslamAbdelRahman, yhchiang

Reviewed By: yhchiang

Subscribers: MarkCallaghan, leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D54075
This commit is contained in:
sdong 2016-02-11 13:45:53 -08:00
parent 3dc3d1c144
commit 92a9ccf1a6
5 changed files with 141 additions and 2 deletions

View File

@ -1,5 +1,7 @@
# Rocksdb Change Log
## Unreleased
### New Features
* Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification.
## 4.5.0 (2/5/2016)
### Public API Changes

View File

@ -487,6 +487,87 @@ TEST_F(CompactionPickerTest, NeedsCompactionFIFO) {
}
#endif // ROCKSDB_LITE
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
Add(2, 6U, "150", "179", 50000000U);
Add(2, 7U, "180", "220", 50000000U);
Add(2, 8U, "321", "400", 50000000U); // File not overlapping
Add(2, 9U, "721", "800", 50000000U);
Add(3, 26U, "150", "170", 260000000U);
Add(3, 27U, "171", "179", 260000000U);
Add(3, 28U, "191", "220", 260000000U);
Add(3, 29U, "221", "300", 260000000U);
Add(3, 30U, "750", "900", 260000000U);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Pick file 8 because it overlaps with 0 files on level 3.
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
}
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping2) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
Add(2, 6U, "150", "175",
60000000U); // Overlaps with file 26, 27, total size 521M
Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27, 28, total size
// 520M, the smalelst overlapping
Add(2, 8U, "201", "300",
60000000U); // Overlaps with file 28, 29, total size 521M
Add(3, 26U, "100", "110", 261000000U);
Add(3, 26U, "150", "170", 261000000U);
Add(3, 27U, "171", "179", 260000000U);
Add(3, 28U, "191", "220", 260000000U);
Add(3, 29U, "221", "300", 261000000U);
Add(3, 30U, "321", "400", 261000000U);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Picking file 7 because overlapping ratio is the biggest.
ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber());
}
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping3) {
NewVersionStorage(6, kCompactionStyleLevel);
mutable_cf_options_.target_file_size_base = 10000000;
mutable_cf_options_.target_file_size_multiplier = 10;
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
// file 7 and 8 over lap with the same file, but file 8 is smaller so
// it will be picked.
Add(2, 6U, "150", "175", 60000000U); // Overlaps with file 26, 27
Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27
Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 27
Add(3, 26U, "160", "165", 260000000U);
Add(3, 26U, "166", "170", 260000000U);
Add(3, 27U, "180", "400", 260000000U);
Add(3, 28U, "401", "500", 260000000U);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Picking file 8 because overlapping ratio is the biggest.
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
}
// This test exhibits the bug where we don't properly reset parent_index in
// PickCompaction()
TEST_F(CompactionPickerTest, ParentIndexResetBug) {

View File

@ -2507,8 +2507,12 @@ TEST_P(CompactionPriTest, Test) {
}
}
INSTANTIATE_TEST_CASE_P(CompactionPriTest, CompactionPriTest,
::testing::Values(0, 1, 2));
INSTANTIATE_TEST_CASE_P(
CompactionPriTest, CompactionPriTest,
::testing::Values(CompactionPri::kByCompensatedSize,
CompactionPri::kOldestLargestSeqFirst,
CompactionPri::kOldestSmallestSeqFirst,
CompactionPri::kMinOverlappingRatio));
#endif // !defined(ROCKSDB_LITE)
} // namespace rocksdb

View File

@ -1377,6 +1377,47 @@ void VersionStorageInfo::UpdateNumNonEmptyLevels() {
}
}
namespace {
// Sort `temp` based on ratio of overlapping size over file size
void SortFileByOverlappingRatio(
const InternalKeyComparator& icmp, const std::vector<FileMetaData*>& files,
const std::vector<FileMetaData*>& next_level_files,
std::vector<Fsize>* temp) {
std::unordered_map<uint64_t, uint64_t> file_to_order;
auto next_level_it = next_level_files.begin();
for (auto& file : files) {
uint64_t overlapping_bytes = 0;
// Skip files in next level that is smaller than current file
while (next_level_it != next_level_files.end() &&
icmp.Compare((*next_level_it)->largest, file->smallest) < 0) {
next_level_it++;
}
while (next_level_it != next_level_files.end() &&
icmp.Compare((*next_level_it)->smallest, file->largest) < 0) {
overlapping_bytes += (*next_level_it)->fd.file_size;
if (icmp.Compare((*next_level_it)->largest, file->largest) > 0) {
// next level file cross large boundary of current file.
break;
}
next_level_it++;
}
assert(file->fd.file_size != 0);
file_to_order[file->fd.GetNumber()] =
overlapping_bytes * 1024u / file->fd.file_size;
}
std::sort(temp->begin(), temp->end(),
[&](const Fsize& f1, const Fsize& f2) -> bool {
return file_to_order[f1.file->fd.GetNumber()] <
file_to_order[f2.file->fd.GetNumber()];
});
}
} // namespace
void VersionStorageInfo::UpdateFilesByCompactionPri(
const MutableCFOptions& mutable_cf_options) {
if (compaction_style_ == kCompactionStyleFIFO ||
@ -1419,6 +1460,10 @@ void VersionStorageInfo::UpdateFilesByCompactionPri(
return f1.file->smallest_seqno < f2.file->smallest_seqno;
});
break;
case kMinOverlappingRatio:
SortFileByOverlappingRatio(*internal_comparator_, files_[level],
files_[level + 1], &temp);
break;
default:
assert(false);
}

View File

@ -80,6 +80,9 @@ enum CompactionStyle : char {
kCompactionStyleNone = 0x3,
};
// In Level-based comapction, it Determines which file from a level to be
// picked to merge to the next level. We suggest people try
// kMinOverlappingRatio first when you tune your database.
enum CompactionPri : char {
// Slightly Priotize larger files by size compensated by #deletes
kByCompensatedSize = 0x0,
@ -90,6 +93,10 @@ enum CompactionPri : char {
// for the longest. If your updates are random across the key space,
// write amplification is slightly better with this option.
kOldestSmallestSeqFirst = 0x2,
// First compact files whose ratio between overlapping size in next level
// and its size is the smallest. It in many cases can optimize write
// amplification.
kMinOverlappingRatio = 0x3,
};
enum class WALRecoveryMode : char {