CompactionPri = kMinOverlappingRatio also uses compensated file size (#4907)

Summary:
Right now, CompactionPri = kMinOverlappingRatio provides best write amplification, but it doesn't
prioritize files with more tombstones. We combine the two good features: make kMinOverlappingRatio
to boost files with lots of tombstones too.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4907

Differential Revision: D13788774

Pulled By: siying

fbshipit-source-id: 1991cbb495fb76c8b529de69896e38d81ed9d9b3
This commit is contained in:
Siying Dong 2019-01-23 13:18:13 -08:00 committed by Facebook Github Bot
parent 1eded07f00
commit 5bf941966b
3 changed files with 36 additions and 5 deletions

View File

@ -7,6 +7,7 @@
* For users of dictionary compression with ZSTD v0.7.0+, we now reuse the same digested dictionary when compressing each of an SST file's data blocks for faster compression speeds. * For users of dictionary compression with ZSTD v0.7.0+, we now reuse the same digested dictionary when compressing each of an SST file's data blocks for faster compression speeds.
### Public API Change ### Public API Change
* CompactionPri = kMinOverlappingRatio also uses compensated file size, which boosts file with lots of tombstones to be compacted first.
* Transaction::GetForUpdate is extended with a do_validate parameter with default value of true. If false it skips validating the snapshot before doing the read. Similarly ::Merge, ::Put, ::Delete, and ::SingleDelete are extended with assume_tracked with default value of false. If true it indicates that call is assumed to be after a ::GetForUpdate. * Transaction::GetForUpdate is extended with a do_validate parameter with default value of true. If false it skips validating the snapshot before doing the read. Similarly ::Merge, ::Put, ::Delete, and ::SingleDelete are extended with assume_tracked with default value of false. If true it indicates that call is assumed to be after a ::GetForUpdate.
* `TableProperties::num_entries` and `TableProperties::num_deletions` now also account for number of range tombstones. * `TableProperties::num_entries` and `TableProperties::num_deletions` now also account for number of range tombstones.
* Remove geodb, spatial_db, document_db, json_document, date_tiered_db, and redis_lists. * Remove geodb, spatial_db, document_db, json_document, date_tiered_db, and redis_lists.

View File

@ -85,8 +85,8 @@ class CompactionPickerTest : public testing::Test {
void Add(int level, uint32_t file_number, const char* smallest, void Add(int level, uint32_t file_number, const char* smallest,
const char* largest, uint64_t file_size = 1, uint32_t path_id = 0, const char* largest, uint64_t file_size = 1, uint32_t path_id = 0,
SequenceNumber smallest_seq = 100, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100,
SequenceNumber largest_seq = 100) { size_t compensated_file_size = 0) {
assert(level < vstorage_->num_levels()); assert(level < vstorage_->num_levels());
FileMetaData* f = new FileMetaData; FileMetaData* f = new FileMetaData;
f->fd = FileDescriptor(file_number, path_id, file_size); f->fd = FileDescriptor(file_number, path_id, file_size);
@ -94,7 +94,8 @@ class CompactionPickerTest : public testing::Test {
f->largest = InternalKey(largest, largest_seq, kTypeValue); f->largest = InternalKey(largest, largest_seq, kTypeValue);
f->fd.smallest_seqno = smallest_seq; f->fd.smallest_seqno = smallest_seq;
f->fd.largest_seqno = largest_seq; f->fd.largest_seqno = largest_seq;
f->compensated_file_size = file_size; f->compensated_file_size =
(compensated_file_size != 0) ? compensated_file_size : file_size;
f->refs = 0; f->refs = 0;
vstorage_->AddFile(level, f); vstorage_->AddFile(level, f);
files_.emplace_back(f); files_.emplace_back(f);
@ -617,6 +618,35 @@ TEST_F(CompactionPickerTest, CompactionPriMinOverlapping3) {
ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber()); ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
} }
TEST_F(CompactionPickerTest, CompactionPriMinOverlapping4) {
NewVersionStorage(6, kCompactionStyleLevel);
ioptions_.compaction_pri = kMinOverlappingRatio;
mutable_cf_options_.max_bytes_for_level_base = 10000000;
mutable_cf_options_.max_bytes_for_level_multiplier = 10;
// file 7 and 8 over lap with the same file, but file 8 is smaller so
// it will be picked.
// Overlaps with file 26, 27. And the file is compensated so will be
// picked up.
Add(2, 6U, "150", "167", 60000000U, 0, 100, 100, 180000000U);
Add(2, 7U, "168", "169", 60000000U); // Overlaps with file 27
Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 28
Add(3, 26U, "160", "165", 60000000U);
// Boosted file size in output level is not considered.
Add(3, 27U, "166", "170", 60000000U, 0, 100, 100, 260000000U);
Add(3, 28U, "180", "400", 60000000U);
Add(3, 29U, "401", "500", 60000000U);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction.get() != nullptr);
ASSERT_EQ(1U, compaction->num_input_files(0));
// Picking file 8 because overlapping ratio is the biggest.
ASSERT_EQ(6U, compaction->input(0, 0)->fd.GetNumber());
}
// This test exhibits the bug where we don't properly reset parent_index in // This test exhibits the bug where we don't properly reset parent_index in
// PickCompaction() // PickCompaction()
TEST_F(CompactionPickerTest, ParentIndexResetBug) { TEST_F(CompactionPickerTest, ParentIndexResetBug) {

View File

@ -1887,9 +1887,9 @@ void SortFileByOverlappingRatio(
next_level_it++; next_level_it++;
} }
assert(file->fd.file_size != 0); assert(file->compensated_file_size != 0);
file_to_order[file->fd.GetNumber()] = file_to_order[file->fd.GetNumber()] =
overlapping_bytes * 1024u / file->fd.file_size; overlapping_bytes * 1024u / file->compensated_file_size;
} }
std::sort(temp->begin(), temp->end(), std::sort(temp->begin(), temp->end(),