Add a new compaction priority that picks file whose overlapping ratio is smallest
Summary: Add a new compaction priority as following: For every file, we calculate total size of files overalapping with the file in the next level, over the file's size itself. The file with smallest ratio will be picked first. My "db_bench --fillrandom" shows about 5% less compaction than kOldestSmallestSeqFirst if --hard_pending_compaction_bytes_limit value to keep LSM tree in shape. If not limiting hard_pending_compaction_bytes_limit, improvement is only 1% or 2%. Test Plan: Add a unit test Reviewers: andrewkr, kradhakrishnan, anthony, IslamAbdelRahman, yhchiang Reviewed By: yhchiang Subscribers: MarkCallaghan, leveldb, dhruba Differential Revision: https://reviews.facebook.net/D54075
This commit is contained in:
parent
3dc3d1c144
commit
92a9ccf1a6
@ -1,5 +1,7 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
### New Features
|
||||
* Add CompactionPri::kMinOverlappingRatio, a compaction picking mode friendly to write amplification.
|
||||
|
||||
## 4.5.0 (2/5/2016)
|
||||
### Public API Changes
|
||||
|
@ -487,6 +487,87 @@ TEST_F(CompactionPickerTest, NeedsCompactionFIFO) {
|
||||
}
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping1) {
|
||||
NewVersionStorage(6, kCompactionStyleLevel);
|
||||
mutable_cf_options_.target_file_size_base = 10000000;
|
||||
mutable_cf_options_.target_file_size_multiplier = 10;
|
||||
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
|
||||
|
||||
Add(2, 6U, "150", "179", 50000000U);
|
||||
Add(2, 7U, "180", "220", 50000000U);
|
||||
Add(2, 8U, "321", "400", 50000000U); // File not overlapping
|
||||
Add(2, 9U, "721", "800", 50000000U);
|
||||
|
||||
Add(3, 26U, "150", "170", 260000000U);
|
||||
Add(3, 27U, "171", "179", 260000000U);
|
||||
Add(3, 28U, "191", "220", 260000000U);
|
||||
Add(3, 29U, "221", "300", 260000000U);
|
||||
Add(3, 30U, "750", "900", 260000000U);
|
||||
UpdateVersionStorageInfo();
|
||||
|
||||
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
||||
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
||||
ASSERT_TRUE(compaction.get() != nullptr);
|
||||
ASSERT_EQ(1U, compaction->num_input_files(0));
|
||||
// Pick file 8 because it overlaps with 0 files on level 3.
|
||||
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
|
||||
}
|
||||
|
||||
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping2) {
|
||||
NewVersionStorage(6, kCompactionStyleLevel);
|
||||
mutable_cf_options_.target_file_size_base = 10000000;
|
||||
mutable_cf_options_.target_file_size_multiplier = 10;
|
||||
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
|
||||
|
||||
Add(2, 6U, "150", "175",
|
||||
60000000U); // Overlaps with file 26, 27, total size 521M
|
||||
Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27, 28, total size
|
||||
// 520M, the smalelst overlapping
|
||||
Add(2, 8U, "201", "300",
|
||||
60000000U); // Overlaps with file 28, 29, total size 521M
|
||||
|
||||
Add(3, 26U, "100", "110", 261000000U);
|
||||
Add(3, 26U, "150", "170", 261000000U);
|
||||
Add(3, 27U, "171", "179", 260000000U);
|
||||
Add(3, 28U, "191", "220", 260000000U);
|
||||
Add(3, 29U, "221", "300", 261000000U);
|
||||
Add(3, 30U, "321", "400", 261000000U);
|
||||
UpdateVersionStorageInfo();
|
||||
|
||||
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
||||
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
||||
ASSERT_TRUE(compaction.get() != nullptr);
|
||||
ASSERT_EQ(1U, compaction->num_input_files(0));
|
||||
// Picking file 7 because overlapping ratio is the biggest.
|
||||
ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber());
|
||||
}
|
||||
|
||||
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping3) {
|
||||
NewVersionStorage(6, kCompactionStyleLevel);
|
||||
mutable_cf_options_.target_file_size_base = 10000000;
|
||||
mutable_cf_options_.target_file_size_multiplier = 10;
|
||||
mutable_cf_options_.compaction_pri = kMinOverlappingRatio;
|
||||
|
||||
// file 7 and 8 over lap with the same file, but file 8 is smaller so
|
||||
// it will be picked.
|
||||
Add(2, 6U, "150", "175", 60000000U); // Overlaps with file 26, 27
|
||||
Add(2, 7U, "176", "200", 60000000U); // Overlaps with file 27
|
||||
Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 27
|
||||
|
||||
Add(3, 26U, "160", "165", 260000000U);
|
||||
Add(3, 26U, "166", "170", 260000000U);
|
||||
Add(3, 27U, "180", "400", 260000000U);
|
||||
Add(3, 28U, "401", "500", 260000000U);
|
||||
UpdateVersionStorageInfo();
|
||||
|
||||
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
||||
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
||||
ASSERT_TRUE(compaction.get() != nullptr);
|
||||
ASSERT_EQ(1U, compaction->num_input_files(0));
|
||||
// Picking file 8 because overlapping ratio is the biggest.
|
||||
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
|
||||
}
|
||||
|
||||
// This test exhibits the bug where we don't properly reset parent_index in
|
||||
// PickCompaction()
|
||||
TEST_F(CompactionPickerTest, ParentIndexResetBug) {
|
||||
|
@ -2507,8 +2507,12 @@ TEST_P(CompactionPriTest, Test) {
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CompactionPriTest, CompactionPriTest,
|
||||
::testing::Values(0, 1, 2));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
CompactionPriTest, CompactionPriTest,
|
||||
::testing::Values(CompactionPri::kByCompensatedSize,
|
||||
CompactionPri::kOldestLargestSeqFirst,
|
||||
CompactionPri::kOldestSmallestSeqFirst,
|
||||
CompactionPri::kMinOverlappingRatio));
|
||||
|
||||
#endif // !defined(ROCKSDB_LITE)
|
||||
} // namespace rocksdb
|
||||
|
@ -1377,6 +1377,47 @@ void VersionStorageInfo::UpdateNumNonEmptyLevels() {
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Sort `temp` based on ratio of overlapping size over file size
|
||||
void SortFileByOverlappingRatio(
|
||||
const InternalKeyComparator& icmp, const std::vector<FileMetaData*>& files,
|
||||
const std::vector<FileMetaData*>& next_level_files,
|
||||
std::vector<Fsize>* temp) {
|
||||
std::unordered_map<uint64_t, uint64_t> file_to_order;
|
||||
auto next_level_it = next_level_files.begin();
|
||||
|
||||
for (auto& file : files) {
|
||||
uint64_t overlapping_bytes = 0;
|
||||
// Skip files in next level that is smaller than current file
|
||||
while (next_level_it != next_level_files.end() &&
|
||||
icmp.Compare((*next_level_it)->largest, file->smallest) < 0) {
|
||||
next_level_it++;
|
||||
}
|
||||
|
||||
while (next_level_it != next_level_files.end() &&
|
||||
icmp.Compare((*next_level_it)->smallest, file->largest) < 0) {
|
||||
overlapping_bytes += (*next_level_it)->fd.file_size;
|
||||
|
||||
if (icmp.Compare((*next_level_it)->largest, file->largest) > 0) {
|
||||
// next level file cross large boundary of current file.
|
||||
break;
|
||||
}
|
||||
next_level_it++;
|
||||
}
|
||||
|
||||
assert(file->fd.file_size != 0);
|
||||
file_to_order[file->fd.GetNumber()] =
|
||||
overlapping_bytes * 1024u / file->fd.file_size;
|
||||
}
|
||||
|
||||
std::sort(temp->begin(), temp->end(),
|
||||
[&](const Fsize& f1, const Fsize& f2) -> bool {
|
||||
return file_to_order[f1.file->fd.GetNumber()] <
|
||||
file_to_order[f2.file->fd.GetNumber()];
|
||||
});
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void VersionStorageInfo::UpdateFilesByCompactionPri(
|
||||
const MutableCFOptions& mutable_cf_options) {
|
||||
if (compaction_style_ == kCompactionStyleFIFO ||
|
||||
@ -1419,6 +1460,10 @@ void VersionStorageInfo::UpdateFilesByCompactionPri(
|
||||
return f1.file->smallest_seqno < f2.file->smallest_seqno;
|
||||
});
|
||||
break;
|
||||
case kMinOverlappingRatio:
|
||||
SortFileByOverlappingRatio(*internal_comparator_, files_[level],
|
||||
files_[level + 1], &temp);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
@ -80,6 +80,9 @@ enum CompactionStyle : char {
|
||||
kCompactionStyleNone = 0x3,
|
||||
};
|
||||
|
||||
// In Level-based comapction, it Determines which file from a level to be
|
||||
// picked to merge to the next level. We suggest people try
|
||||
// kMinOverlappingRatio first when you tune your database.
|
||||
enum CompactionPri : char {
|
||||
// Slightly Priotize larger files by size compensated by #deletes
|
||||
kByCompensatedSize = 0x0,
|
||||
@ -90,6 +93,10 @@ enum CompactionPri : char {
|
||||
// for the longest. If your updates are random across the key space,
|
||||
// write amplification is slightly better with this option.
|
||||
kOldestSmallestSeqFirst = 0x2,
|
||||
// First compact files whose ratio between overlapping size in next level
|
||||
// and its size is the smallest. It in many cases can optimize write
|
||||
// amplification.
|
||||
kMinOverlappingRatio = 0x3,
|
||||
};
|
||||
|
||||
enum class WALRecoveryMode : char {
|
||||
|
Loading…
Reference in New Issue
Block a user