From 6a14f7a9766ce783f7a8e6cf2bcdd99e4f70da34 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 7 Apr 2016 17:40:42 -0700 Subject: [PATCH] Change several option defaults Summary: Changing several option defaults: options.max_open_files changes from 5000 to -1 options.base_background_compactions changes from max_background_compactions to 1 options.wal_recovery_mode changes from kTolerateCorruptedTailRecords to kTolerateCorruptedTailRecords options.compaction_pri changes from kByCompensatedSize to kByCompensatedSize Test Plan: Write unit tests to see OldDefaults() works as expected. Reviewers: IslamAbdelRahman, yhchiang, igor Reviewed By: igor Subscribers: MarkCallaghan, yiwu, kradhakrishnan, leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D56427 --- DEFAULT_OPTIONS_HISTORY.md | 19 +++++++---- db/column_family_test.cc | 7 ++++ db/compaction_picker_test.cc | 14 ++++---- db/corruption_test.cc | 1 + db/db_compaction_test.cc | 12 +++---- db/db_test_util.cc | 5 +++ include/rocksdb/options.h | 8 ++--- util/options.cc | 31 ++++++++++++------ util/options_test.cc | 63 +++++++++++++++++++++++++++--------- 9 files changed, 110 insertions(+), 50 deletions(-) diff --git a/DEFAULT_OPTIONS_HISTORY.md b/DEFAULT_OPTIONS_HISTORY.md index 532213b24..955ccfb24 100644 --- a/DEFAULT_OPTIONS_HISTORY.md +++ b/DEFAULT_OPTIONS_HISTORY.md @@ -1,9 +1,14 @@ +## Unreleased +* options.max_open_files changes from 5000 to -1. It improves performance, but users need to set file descriptor limit to be large enough and watch memory usage for index and bloom filters. +* options.base_background_compactions changes from max_background_compactions to 1. When users set higher max_background_compactions but the write throughput is not high, the writes are less spiky to disks. +* options.wal_recovery_mode changes from kTolerateCorruptedTailRecords to kPointInTimeRecovery. Avoid some false positive when file system or hardware reorder the writes for file data and metadata. + # RocksDB default options change log ## 4.7.0 (4/8/2016) -* options.write_buffer_size changes from 4MB to 64MB -* options.target_file_size_base changes from 2MB to 64MB -* options.max_bytes_for_level_base changes from 10MB to 256MB -* options.soft_pending_compaction_bytes_limit changes from 0 (disabled) to 64GB -* options.hard_pending_compaction_bytes_limit changes from 0 (disabled) to 256GB -* table_cache_numshardbits changes from 4 to 6 -* max_file_opening_threads changes from 1 to 16 +* options.write_buffer_size changes from 4MB to 64MB. +* options.target_file_size_base changes from 2MB to 64MB. +* options.max_bytes_for_level_base changes from 10MB to 256MB. +* options.soft_pending_compaction_bytes_limit changes from 0 (disabled) to 64GB. +* options.hard_pending_compaction_bytes_limit changes from 0 (disabled) to 256GB. +* table_cache_numshardbits changes from 4 to 6. +* max_file_opening_threads changes from 1 to 16. diff --git a/db/column_family_test.cc b/db/column_family_test.cc index 88e85bf15..f8ebec57f 100644 --- a/db/column_family_test.cc +++ b/db/column_family_test.cc @@ -1042,6 +1042,7 @@ TEST_F(ColumnFamilyTest, AutomaticAndManualCompactions) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.disableDataSync = true; db_options_.max_background_compactions = 3; + db_options_.base_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; @@ -1134,6 +1135,7 @@ TEST_F(ColumnFamilyTest, ManualAndAutomaticCompactions) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.disableDataSync = true; db_options_.max_background_compactions = 3; + db_options_.base_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; @@ -1229,6 +1231,7 @@ TEST_F(ColumnFamilyTest, SameCFManualManualCompactions) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.disableDataSync = true; db_options_.max_background_compactions = 3; + db_options_.base_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; @@ -1327,6 +1330,7 @@ TEST_F(ColumnFamilyTest, SameCFManualAutomaticCompactions) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.disableDataSync = true; db_options_.max_background_compactions = 3; + db_options_.base_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; @@ -1416,6 +1420,7 @@ TEST_F(ColumnFamilyTest, SameCFManualAutomaticCompactionsLevel) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.disableDataSync = true; db_options_.max_background_compactions = 3; + db_options_.base_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; @@ -1512,6 +1517,7 @@ TEST_F(ColumnFamilyTest, SameCFManualAutomaticConflict) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.disableDataSync = true; db_options_.max_background_compactions = 3; + db_options_.base_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; @@ -1631,6 +1637,7 @@ TEST_F(ColumnFamilyTest, SameCFAutomaticManualCompactions) { db_options_.max_open_files = 20; // only 10 files in file cache db_options_.disableDataSync = true; db_options_.max_background_compactions = 3; + db_options_.base_background_compactions = 3; default_cf.compaction_style = kCompactionStyleLevel; default_cf.num_levels = 3; diff --git a/db/compaction_picker_test.cc b/db/compaction_picker_test.cc index 69a21cb78..4b4ee87bd 100644 --- a/db/compaction_picker_test.cc +++ b/db/compaction_picker_test.cc @@ -79,7 +79,7 @@ class CompactionPickerTest : public testing::Test { } void Add(int level, uint32_t file_number, const char* smallest, - const char* largest, uint64_t file_size = 0, uint32_t path_id = 0, + const char* largest, uint64_t file_size = 1, uint32_t path_id = 0, SequenceNumber smallest_seq = 100, SequenceNumber largest_seq = 100) { assert(level < vstorage_->num_levels()); @@ -320,6 +320,7 @@ TEST_F(CompactionPickerTest, Level0TriggerDynamic4) { mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 200; mutable_cf_options_.max_bytes_for_level_multiplier = 10; + NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); Add(0, 2U, "200", "250"); @@ -352,6 +353,7 @@ TEST_F(CompactionPickerTest, LevelTriggerDynamic4) { mutable_cf_options_.level0_file_num_compaction_trigger = 2; mutable_cf_options_.max_bytes_for_level_base = 200; mutable_cf_options_.max_bytes_for_level_multiplier = 10; + mutable_cf_options_.compaction_pri = kMinOverlappingRatio; NewVersionStorage(num_levels, kCompactionStyleLevel); Add(0, 1U, "150", "200"); Add(num_levels - 1, 3U, "200", "250", 300U); @@ -367,11 +369,9 @@ TEST_F(CompactionPickerTest, LevelTriggerDynamic4) { cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_)); ASSERT_TRUE(compaction.get() != nullptr); ASSERT_EQ(1U, compaction->num_input_files(0)); - ASSERT_EQ(6U, compaction->input(0, 0)->fd.GetNumber()); - ASSERT_EQ(2U, compaction->num_input_files(1)); - ASSERT_EQ(3U, compaction->input(1, 0)->fd.GetNumber()); - ASSERT_EQ(4U, compaction->input(1, 1)->fd.GetNumber()); - ASSERT_EQ(2U, compaction->num_input_levels()); + ASSERT_EQ(5U, compaction->input(0, 0)->fd.GetNumber()); + ASSERT_EQ(0, compaction->num_input_files(1)); + ASSERT_EQ(1U, compaction->num_input_levels()); ASSERT_EQ(num_levels - 1, compaction->output_level()); } @@ -599,6 +599,8 @@ TEST_F(CompactionPickerTest, ParentIndexResetBug) { // ranges (with different sequence numbers) in the input files. TEST_F(CompactionPickerTest, OverlappingUserKeys) { NewVersionStorage(6, kCompactionStyleLevel); + mutable_cf_options_.compaction_pri = kByCompensatedSize; + Add(1, 1U, "100", "150", 1U); // Overlapping user keys Add(1, 2U, "200", "400", 1U); diff --git a/db/corruption_test.cc b/db/corruption_test.cc index 85bfe57cb..e7d82407a 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -42,6 +42,7 @@ class CorruptionTest : public testing::Test { CorruptionTest() { tiny_cache_ = NewLRUCache(100); + options_.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; options_.env = &env_; dbname_ = test::TmpDir() + "/corruption_test"; DestroyDB(dbname_, options_); diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index c794407e9..ada936a87 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -70,8 +70,7 @@ class FlushedFileCollector : public EventListener { static const int kCDTValueSize = 1000; static const int kCDTKeysPerBuffer = 4; static const int kCDTNumLevels = 8; -Options DeletionTriggerOptions() { - Options options; +Options DeletionTriggerOptions(Options options) { options.compression = kNoCompression; options.write_buffer_size = kCDTKeysPerBuffer * (kCDTValueSize + 24); options.min_write_buffer_number_to_merge = 1; @@ -174,7 +173,7 @@ const SstFileMetaData* PickFileRandomly( TEST_P(DBCompactionTestWithParam, CompactionDeletionTrigger) { for (int tid = 0; tid < 3; ++tid) { uint64_t db_size[2]; - Options options = CurrentOptions(DeletionTriggerOptions()); + Options options = DeletionTriggerOptions(CurrentOptions()); options.max_subcompactions = max_subcompactions_; if (tid == 1) { @@ -217,8 +216,7 @@ TEST_F(DBCompactionTest, SkipStatsUpdateTest) { // the compaction behavior when there are many of deletion entries. // The test will need to be updated if the internal behavior changes. - Options options = DeletionTriggerOptions(); - options = CurrentOptions(options); + Options options = DeletionTriggerOptions(CurrentOptions()); options.env = env_; DestroyAndReopen(options); Random rnd(301); @@ -349,7 +347,7 @@ TEST_F(DBCompactionTest, TestTableReaderForCompaction) { TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) { for (int tid = 0; tid < 2; ++tid) { uint64_t db_size[3]; - Options options = CurrentOptions(DeletionTriggerOptions()); + Options options = DeletionTriggerOptions(CurrentOptions()); options.max_subcompactions = max_subcompactions_; if (tid == 1) { @@ -406,7 +404,7 @@ TEST_P(DBCompactionTestWithParam, CompactionDeletionTriggerReopen) { TEST_F(DBCompactionTest, DisableStatsUpdateReopen) { uint64_t db_size[3]; for (int test = 0; test < 2; ++test) { - Options options = CurrentOptions(DeletionTriggerOptions()); + Options options = DeletionTriggerOptions(CurrentOptions()); options.skip_stats_update_on_db_open = (test == 0); env_->random_read_counter_.Reset(); diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 41da85f34..8c1a5f390 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -212,6 +212,11 @@ Options DBTestBase::CurrentOptions( options.write_buffer_size = 4090 * 4096; options.target_file_size_base = 2 * 1024 * 1024; options.max_bytes_for_level_base = 10 * 1024 * 1024; + options.max_open_files = 5000; + options.base_background_compactions = -1; + options.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; + options.compaction_pri = CompactionPri::kByCompensatedSize; + return CurrentOptions(options, options_override); } diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index 01c3e80ac..67ab2d8f5 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -605,7 +605,7 @@ struct ColumnFamilyOptions { // If level compaction_style = kCompactionStyleLevel, for each level, // which files are prioritized to be picked to compact. - // Default: kCompactionPriByCompensatedSize + // Default: kByCompensatedSize CompactionPri compaction_pri; // If true, compaction will verify checksum on every read that happens @@ -893,7 +893,7 @@ struct DBOptions { // files opened are always kept open. You can estimate number of files based // on target_file_size_base and target_file_size_multiplier for level-based // compaction. For universal-style compaction, you can usually set it to -1. - // Default: 5000 or ulimit value of max open files (whichever is smaller) + // Default: -1 int max_open_files; // If max_open_files is -1, DB will open all files on DB::Open(). You can @@ -979,7 +979,7 @@ struct DBOptions { // Suggested number of concurrent background compaction jobs, submitted to // the default LOW priority thread pool. // - // Default: max_background_compactions + // Default: 1 int base_background_compactions; // Maximum number of concurrent background compaction jobs, submitted to @@ -1296,7 +1296,7 @@ struct DBOptions { bool skip_stats_update_on_db_open; // Recovery mode to control the consistency while replaying WAL - // Default: kTolerateCorruptedTailRecords + // Default: kPointInTimeRecovery WALRecoveryMode wal_recovery_mode; // A global cache for table-level rows. diff --git a/util/options.cc b/util/options.cc index f195cd49c..44c7604eb 100644 --- a/util/options.cc +++ b/util/options.cc @@ -220,7 +220,7 @@ DBOptions::DBOptions() #else info_log_level(DEBUG_LEVEL), #endif // NDEBUG - max_open_files(5000), + max_open_files(-1), max_file_opening_threads(16), max_total_wal_size(0), statistics(nullptr), @@ -229,7 +229,7 @@ DBOptions::DBOptions() db_log_dir(""), wal_dir(""), delete_obsolete_files_period_micros(6ULL * 60 * 60 * 1000000), - base_background_compactions(-1), + base_background_compactions(1), max_background_compactions(1), max_subcompactions(1), max_background_flushes(1), @@ -267,7 +267,7 @@ DBOptions::DBOptions() write_thread_max_yield_usec(100), write_thread_slow_yield_usec(3), skip_stats_update_on_db_open(false), - wal_recovery_mode(WALRecoveryMode::kTolerateCorruptedTailRecords), + wal_recovery_mode(WALRecoveryMode::kPointInTimeRecovery), row_cache(nullptr), #ifndef ROCKSDB_LITE wal_filter(nullptr), @@ -678,18 +678,29 @@ Options* Options::OldDefaults(int rocksdb_major_version, DBOptions* DBOptions::OldDefaults(int rocksdb_major_version, int rocksdb_minor_version) { - max_file_opening_threads = 1; - table_cache_numshardbits = 4; + if (rocksdb_major_version < 4 || + (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) { + max_file_opening_threads = 1; + table_cache_numshardbits = 4; + } + max_open_files = 5000; + base_background_compactions = -1; + wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; return this; } ColumnFamilyOptions* ColumnFamilyOptions::OldDefaults( int rocksdb_major_version, int rocksdb_minor_version) { - write_buffer_size = 4 << 20; - target_file_size_base = 2 * 1048576; - max_bytes_for_level_base = 10 * 1048576; - soft_pending_compaction_bytes_limit = 0; - hard_pending_compaction_bytes_limit = 0; + if (rocksdb_major_version < 4 || + (rocksdb_major_version == 4 && rocksdb_minor_version < 7)) { + write_buffer_size = 4 << 20; + target_file_size_base = 2 * 1048576; + max_bytes_for_level_base = 10 * 1048576; + soft_pending_compaction_bytes_limit = 0; + hard_pending_compaction_bytes_limit = 0; + } + compaction_pri = CompactionPri::kByCompensatedSize; + return this; } diff --git a/util/options_test.cc b/util/options_test.cc index 879cc3c6b..4ed94d082 100644 --- a/util/options_test.cc +++ b/util/options_test.cc @@ -1267,22 +1267,53 @@ TEST_F(OptionsParserTest, DifferentDefault) { RocksDBOptionsParser parser; ASSERT_OK(parser.Parse(kOptionsFileName, env_.get())); - Options old_default_opts; - old_default_opts.OldDefaults(); - ASSERT_EQ(10 * 1048576, old_default_opts.max_bytes_for_level_base); - - Options old_default_opts46; - old_default_opts46.OldDefaults(); - ASSERT_EQ(10 * 1048576, old_default_opts46.max_bytes_for_level_base); - ASSERT_EQ(4, old_default_opts46.table_cache_numshardbits); - - ColumnFamilyOptions old_default_cf_opts; - old_default_cf_opts.OldDefaults(); - ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); - - ColumnFamilyOptions old_default_cf_opts46; - old_default_cf_opts46.OldDefaults(); - ASSERT_EQ(2 * 1048576, old_default_cf_opts46.target_file_size_base); + { + Options old_default_opts; + old_default_opts.OldDefaults(); + ASSERT_EQ(10 * 1048576, old_default_opts.max_bytes_for_level_base); + ASSERT_EQ(5000, old_default_opts.max_open_files); + ASSERT_EQ(-1, old_default_opts.base_background_compactions); + ASSERT_EQ(WALRecoveryMode::kTolerateCorruptedTailRecords, + old_default_opts.wal_recovery_mode); + } + { + Options old_default_opts; + old_default_opts.OldDefaults(4, 6); + ASSERT_EQ(10 * 1048576, old_default_opts.max_bytes_for_level_base); + ASSERT_EQ(5000, old_default_opts.max_open_files); + } + { + Options old_default_opts; + old_default_opts.OldDefaults(4, 7); + ASSERT_NE(10 * 1048576, old_default_opts.max_bytes_for_level_base); + ASSERT_NE(4, old_default_opts.table_cache_numshardbits); + ASSERT_EQ(5000, old_default_opts.max_open_files); + } + { + ColumnFamilyOptions old_default_cf_opts; + old_default_cf_opts.OldDefaults(); + ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); + ASSERT_EQ(4 << 20, old_default_cf_opts.write_buffer_size); + ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); + ASSERT_EQ(0, old_default_cf_opts.soft_pending_compaction_bytes_limit); + ASSERT_EQ(0, old_default_cf_opts.hard_pending_compaction_bytes_limit); + ASSERT_EQ(CompactionPri::kByCompensatedSize, + old_default_cf_opts.compaction_pri); + } + { + ColumnFamilyOptions old_default_cf_opts; + old_default_cf_opts.OldDefaults(4, 6); + ASSERT_EQ(2 * 1048576, old_default_cf_opts.target_file_size_base); + ASSERT_EQ(CompactionPri::kByCompensatedSize, + old_default_cf_opts.compaction_pri); + } + { + ColumnFamilyOptions old_default_cf_opts; + old_default_cf_opts.OldDefaults(4, 7); + ASSERT_NE(2 * 1048576, old_default_cf_opts.target_file_size_base); + ASSERT_EQ(CompactionPri::kByCompensatedSize, + old_default_cf_opts.compaction_pri); + } ColumnFamilyOptions cf_small_opts; cf_small_opts.OptimizeForSmallDb();