From 416943bf28b4630adc8a119ccbf3bb348487bcce Mon Sep 17 00:00:00 2001 From: Connor1996 Date: Mon, 24 Aug 2020 22:05:10 -0700 Subject: [PATCH] Eliminates a no-op compaction upon snapshot release when disabling auto compactions (#7267) Summary: After releasing a snapshot, it checks whether it is suitable to trigger bottom compactions. When disabling auto compactions, it may still schedule compaction when releasing a snapshot. Whereas no compaction job will be actually handled, so the state of LSM is not changed and compaction will be triggered again and again every time releasing a snapshot. Too frequent compactions lead to high CPU usage and high db_mutex lock contention which affects foreground write duration finally. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7267 Test Plan: - make check - manual test Reviewed By: akankshamahajan15 Differential Revision: D23252880 Pulled By: ajkr fbshipit-source-id: 4431e071a35d9912a2a3592875db27bae521434b --- HISTORY.md | 1 + db/column_family.cc | 3 +- db/db_compaction_test.cc | 70 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 18daf4bd4..016646df0 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,6 +7,7 @@ * Fix a bug where a level refitting in CompactRange() might race with an automatic compaction that puts the data to the target level of the refitting. The bug has been there for years. * BackupEngine::CreateNewBackup could fail intermittently with non-OK status when backing up a read-write DB configured with a DBOptions::file_checksum_gen_factory. This issue has been worked-around such that CreateNewBackup should succeed, but (until fully fixed) BackupEngine might not see all checksums available in the DB. * Fix a bug where immutable flushed memtable is never destroyed because a memtable is not added to delete list because of refernce hold by super version and super version doesn't switch because of empty delete list. So memory usage increases beyond write_buffer_size + max_write_buffer_size_to_maintain. +* Fix useless no-op compactions scheduled upon snapshot release when options.disable-auto-compactions = true. ### New Features * A new option `std::shared_ptr file_checksum_gen_factory` is added to `BackupableDBOptions`. The default value for this option is `nullptr`. If this option is null, the default backup engine checksum function (crc32c) will be used for creating, verifying, or restoring backups. If it is not null and is set to the DB custom checksum factory, the custom checksum function used in DB will also be used for creating, verifying, or restoring backups, in addition to the default checksum function (crc32c). If it is not null and is set to a custom checksum factory different than the DB custom checksum factory (which may be null), BackupEngine will return `Status::InvalidArgument()`. diff --git a/db/column_family.cc b/db/column_family.cc index 745f5de59..c167dcf29 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1054,7 +1054,8 @@ void ColumnFamilyData::CreateNewMemtable( } bool ColumnFamilyData::NeedsCompaction() const { - return compaction_picker_->NeedsCompaction(current_->storage_info()); + return !mutable_cf_options_.disable_auto_compactions && + compaction_picker_->NeedsCompaction(current_->storage_info()); } Compaction* ColumnFamilyData::PickCompaction( diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index f51217424..8ce04c0bd 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -3600,6 +3600,76 @@ TEST_F(DBCompactionTest, CompactBottomLevelFilesWithDeletions) { ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } +TEST_F(DBCompactionTest, NoCompactBottomLevelFilesWithDeletions) { + // bottom-level files may contain deletions due to snapshots protecting the + // deleted keys. Once the snapshot is released, we should see files with many + // such deletions undergo single-file compactions. But when disabling auto + // compactions, it shouldn't be triggered which may causing too many + // background jobs. + const int kNumKeysPerFile = 1024; + const int kNumLevelFiles = 4; + const int kValueSize = 128; + Options options = CurrentOptions(); + options.compression = kNoCompression; + options.disable_auto_compactions = true; + options.level0_file_num_compaction_trigger = kNumLevelFiles; + // inflate it a bit to account for key/metadata overhead + options.target_file_size_base = 120 * kNumKeysPerFile * kValueSize / 100; + Reopen(options); + + Random rnd(301); + const Snapshot* snapshot = nullptr; + for (int i = 0; i < kNumLevelFiles; ++i) { + for (int j = 0; j < kNumKeysPerFile; ++j) { + ASSERT_OK( + Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kValueSize))); + } + if (i == kNumLevelFiles - 1) { + snapshot = db_->GetSnapshot(); + // delete every other key after grabbing a snapshot, so these deletions + // and the keys they cover can't be dropped until after the snapshot is + // released. + for (int j = 0; j < kNumLevelFiles * kNumKeysPerFile; j += 2) { + ASSERT_OK(Delete(Key(j))); + } + } + Flush(); + if (i < kNumLevelFiles - 1) { + ASSERT_EQ(i + 1, NumTableFilesAtLevel(0)); + } + } + dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr); + ASSERT_EQ(kNumLevelFiles, NumTableFilesAtLevel(1)); + + std::vector pre_release_metadata, post_release_metadata; + db_->GetLiveFilesMetaData(&pre_release_metadata); + // just need to bump seqnum so ReleaseSnapshot knows the newest key in the SST + // files does not need to be preserved in case of a future snapshot. + ASSERT_OK(Put(Key(0), "val")); + + // release snapshot and no compaction should be triggered. + std::atomic num_compactions{0}; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( + "DBImpl::BackgroundCompaction:Start", + [&](void* /*arg*/) { num_compactions.fetch_add(1); }); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + db_->ReleaseSnapshot(snapshot); + dbfull()->TEST_WaitForCompact(); + ASSERT_EQ(0, num_compactions); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); + + db_->GetLiveFilesMetaData(&post_release_metadata); + ASSERT_EQ(pre_release_metadata.size(), post_release_metadata.size()); + for (size_t i = 0; i < pre_release_metadata.size(); ++i) { + const auto& pre_file = pre_release_metadata[i]; + const auto& post_file = post_release_metadata[i]; + ASSERT_EQ(1, pre_file.level); + ASSERT_EQ(1, post_file.level); + // each file is same as before with deletion markers/deleted keys. + ASSERT_EQ(post_file.size, pre_file.size); + } +} + TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) { const int kNumKeysPerFile = 32; const int kNumLevelFiles = 2;