From 7a3577519f00c216cf15063025ec12b5c2125866 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Thu, 21 May 2015 11:40:48 -0700 Subject: [PATCH] Don't artificially inflate L0 score Summary: This turns out to be pretty bad because if we prioritize L0->L1 then L1 can grow artificially large, which makes L0->L1 more and more expensive. For example: 256MB @ L0 + 256MB @ L1 --> 512MB @ L1 256MB @ L0 + 512MB @ L1 --> 768MB @ L1 256MB @ L0 + 768MB @ L1 --> 1GB @ L1 .... 256MB @ L0 + 10GB @ L1 --> 10.2GB @ L1 At some point we need to start compacting L1->L2 to speed up L0->L1. Test Plan: The performance improvement is massive for heavy write workload. This is the benchmark I ran: https://phabricator.fb.com/P19842671. Before this change, the benchmark took 47 minutes to complete. After, the benchmark finished in 2minutes. You can see full results here: https://phabricator.fb.com/P19842674 Also, we ran this diff on MongoDB on RocksDB on one replicaset. Before the change, our initial sync was so slow that it couldn't keep up with primary writes. After the change, the import finished without any issues Reviewers: dynamike, MarkCallaghan, rven, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D38637 --- db/db_bench.cc | 12 +++++++++--- db/version_set.cc | 7 ------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/db/db_bench.cc b/db/db_bench.cc index e4fc1c445..e7486a24c 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -367,13 +367,19 @@ static std::vector FLAGS_max_bytes_for_level_multiplier_additional_v; DEFINE_string(max_bytes_for_level_multiplier_additional, "", "A vector that specifies additional fanout per level"); -DEFINE_int32(level0_stop_writes_trigger, 12, "Number of files in level-0" +DEFINE_int32(level0_stop_writes_trigger, + rocksdb::Options().level0_stop_writes_trigger, + "Number of files in level-0" " that will trigger put stop."); -DEFINE_int32(level0_slowdown_writes_trigger, 8, "Number of files in level-0" +DEFINE_int32(level0_slowdown_writes_trigger, + rocksdb::Options().level0_slowdown_writes_trigger, + "Number of files in level-0" " that will slow down writes."); -DEFINE_int32(level0_file_num_compaction_trigger, 4, "Number of files in level-0" +DEFINE_int32(level0_file_num_compaction_trigger, + rocksdb::Options().level0_file_num_compaction_trigger, + "Number of files in level-0" " when compactions start"); static bool ValidateInt32Percent(const char* flagname, int32_t value) { diff --git a/db/version_set.cc b/db/version_set.cc index 9881c3edc..3987fcf91 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1019,13 +1019,6 @@ void VersionStorageInfo::ComputeCompactionScore( if (compaction_style_ == kCompactionStyleFIFO) { score = static_cast(total_size) / compaction_options_fifo.max_table_files_size; - } else if (num_sorted_runs >= - mutable_cf_options.level0_stop_writes_trigger) { - // If we are slowing down writes, then we better compact that first - score = 1000000; - } else if (num_sorted_runs >= - mutable_cf_options.level0_slowdown_writes_trigger) { - score = 10000; } else { score = static_cast(num_sorted_runs) / mutable_cf_options.level0_file_num_compaction_trigger;