From dae3b5545c556e2479972a21d1ed8850aac0bae8 Mon Sep 17 00:00:00 2001 From: anand76 Date: Thu, 28 Mar 2019 15:13:02 -0700 Subject: [PATCH] Smooth the deletion of WAL files (#5116) Summary: WAL files are currently not subject to deletion rate limiting by DeleteScheduler. If the size of the WAL files is significant, this can cause a high delete rate on SSDs that may affect other operations. To fix it, force WAL file deletions to go through the SstFileManager. Original PR for this is #2768 Pull Request resolved: https://github.com/facebook/rocksdb/pull/5116 Differential Revision: D14669437 Pulled By: anand1976 fbshipit-source-id: c5f62d0640cebaa1574de841a1d01e4ce2faadf0 --- db/db_impl.cc | 13 +++-- db/db_impl_files.cc | 4 +- db/db_sst_test.cc | 87 ++++++++++++++++++++++++++---- db/wal_manager.cc | 8 +-- include/rocksdb/sst_file_manager.h | 3 ++ util/file_util.cc | 8 +-- util/file_util.h | 11 ++-- utilities/blob_db/blob_db_test.cc | 9 +++- 8 files changed, 108 insertions(+), 35 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 17da88b8a..becc8e8e2 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2878,8 +2878,8 @@ Status DestroyDB(const std::string& dbname, const Options& options, std::string path_to_delete = dbname + "/" + fname; if (type == kMetaDatabase) { del = DestroyDB(path_to_delete, options); - } else if (type == kTableFile) { - del = DeleteSSTFile(&soptions, path_to_delete, dbname); + } else if (type == kTableFile || type == kLogFile) { + del = DeleteDBFile(&soptions, path_to_delete, dbname); } else { del = env->DeleteFile(path_to_delete); } @@ -2913,7 +2913,7 @@ Status DestroyDB(const std::string& dbname, const Options& options, if (ParseFileName(fname, &number, &type) && type == kTableFile) { // Lock file will be deleted at end std::string table_path = path + "/" + fname; - Status del = DeleteSSTFile(&soptions, table_path, dbname); + Status del = DeleteDBFile(&soptions, table_path, dbname); if (result.ok() && !del.ok()) { result = del; } @@ -2939,7 +2939,8 @@ Status DestroyDB(const std::string& dbname, const Options& options, // Delete archival files. for (const auto& file : archiveFiles) { if (ParseFileName(file, &number, &type) && type == kLogFile) { - Status del = env->DeleteFile(archivedir + "/" + file); + Status del = + DeleteDBFile(&soptions, archivedir + "/" + file, archivedir); if (result.ok() && !del.ok()) { result = del; } @@ -2952,7 +2953,9 @@ Status DestroyDB(const std::string& dbname, const Options& options, if (wal_dir_exists) { for (const auto& file : walDirFiles) { if (ParseFileName(file, &number, &type) && type == kLogFile) { - Status del = env->DeleteFile(LogFileName(soptions.wal_dir, number)); + Status del = + DeleteDBFile(&soptions, LogFileName(soptions.wal_dir, number), + soptions.wal_dir); if (result.ok() && !del.ok()) { result = del; } diff --git a/db/db_impl_files.cc b/db/db_impl_files.cc index 45187c4b0..90cc6a14b 100644 --- a/db/db_impl_files.cc +++ b/db/db_impl_files.cc @@ -259,9 +259,9 @@ void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname, const std::string& path_to_sync, FileType type, uint64_t number) { Status file_deletion_status; - if (type == kTableFile) { + if (type == kTableFile || type == kLogFile) { file_deletion_status = - DeleteSSTFile(&immutable_db_options_, fname, path_to_sync); + DeleteDBFile(&immutable_db_options_, fname, path_to_sync); } else { file_deletion_status = env_->DeleteFile(fname); } diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc index 0612842a9..cd6cde19a 100644 --- a/db/db_sst_test.cc +++ b/db/db_sst_test.cc @@ -367,14 +367,16 @@ TEST_F(DBSSTTest, RateLimitedDelete) { auto sfm = static_cast(options.sst_file_manager.get()); sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1); + WriteOptions wo; + wo.disableWAL = true; ASSERT_OK(TryReopen(options)); // Create 4 files in L0 for (char v = 'a'; v <= 'd'; v++) { - ASSERT_OK(Put("Key2", DummyString(1024, v))); - ASSERT_OK(Put("Key3", DummyString(1024, v))); - ASSERT_OK(Put("Key4", DummyString(1024, v))); - ASSERT_OK(Put("Key1", DummyString(1024, v))); - ASSERT_OK(Put("Key4", DummyString(1024, v))); + ASSERT_OK(Put("Key2", DummyString(1024, v), wo)); + ASSERT_OK(Put("Key3", DummyString(1024, v), wo)); + ASSERT_OK(Put("Key4", DummyString(1024, v), wo)); + ASSERT_OK(Put("Key1", DummyString(1024, v), wo)); + ASSERT_OK(Put("Key4", DummyString(1024, v), wo)); ASSERT_OK(Flush()); } // We created 4 sst files in L0 @@ -408,6 +410,55 @@ TEST_F(DBSSTTest, RateLimitedDelete) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } +TEST_F(DBSSTTest, RateLimitedWALDelete) { + Destroy(last_options_); + + std::vector penalties; + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "DeleteScheduler::BackgroundEmptyTrash:Wait", + [&](void* arg) { penalties.push_back(*(static_cast(arg))); }); + + env_->no_slowdown_ = true; + env_->time_elapse_only_sleep_ = true; + Options options = CurrentOptions(); + options.disable_auto_compactions = true; + options.env = env_; + + int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec + Status s; + options.sst_file_manager.reset( + NewSstFileManager(env_, nullptr, "", 0, false, &s, 0)); + ASSERT_OK(s); + options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec); + auto sfm = static_cast(options.sst_file_manager.get()); + sfm->delete_scheduler()->SetMaxTrashDBRatio(2.1); + + ASSERT_OK(TryReopen(options)); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + // Create 4 files in L0 + for (char v = 'a'; v <= 'd'; v++) { + ASSERT_OK(Put("Key2", DummyString(1024, v))); + ASSERT_OK(Put("Key3", DummyString(1024, v))); + ASSERT_OK(Put("Key4", DummyString(1024, v))); + ASSERT_OK(Put("Key1", DummyString(1024, v))); + ASSERT_OK(Put("Key4", DummyString(1024, v))); + ASSERT_OK(Flush()); + } + // We created 4 sst files in L0 + ASSERT_EQ("4", FilesPerLevel(0)); + + // Compaction will move the 4 files in L0 to trash and create 1 L1 file + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + ASSERT_OK(dbfull()->TEST_WaitForCompact(true)); + ASSERT_EQ("0,1", FilesPerLevel(0)); + + sfm->WaitForEmptyTrash(); + ASSERT_EQ(penalties.size(), 8); + + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); +} + TEST_F(DBSSTTest, OpenDBWithExistingTrash) { Options options = CurrentOptions(); @@ -446,7 +497,6 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) { rocksdb::SyncPoint::GetInstance()->SetCallBack( "DeleteScheduler::DeleteFile", [&](void* /*arg*/) { bg_delete_file++; }); - rocksdb::SyncPoint::GetInstance()->EnableProcessing(); Options options = CurrentOptions(); options.disable_auto_compactions = true; @@ -464,10 +514,14 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) { auto sfm = static_cast(options.sst_file_manager.get()); DestroyAndReopen(options); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + WriteOptions wo; + wo.disableWAL = true; // Create 4 files in L0 for (int i = 0; i < 4; i++) { - ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'))); + ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'), wo)); ASSERT_OK(Flush()); } // We created 4 sst files in L0 @@ -483,7 +537,7 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) { // Create 4 files in L0 for (int i = 4; i < 8; i++) { - ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'))); + ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'), wo)); ASSERT_OK(Flush()); } ASSERT_EQ("4,1", FilesPerLevel(0)); @@ -538,14 +592,27 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) { // Close DB and destroy it using DeleteScheduler Close(); + int num_sst_files = 0; + int num_wal_files = 0; + std::vector db_files; + env_->GetChildren(dbname_, &db_files); + for (std::string f : db_files) { + if (f.substr(f.find_last_of(".") + 1) == "sst") { + num_sst_files++; + } else if (f.substr(f.find_last_of(".") + 1) == "log") { + num_wal_files++; + } + } + ASSERT_GT(num_sst_files, 0); + ASSERT_GT(num_wal_files, 0); + auto sfm = static_cast(options.sst_file_manager.get()); sfm->SetDeleteRateBytesPerSecond(1024 * 1024); sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1); ASSERT_OK(DestroyDB(dbname_, options)); sfm->WaitForEmptyTrash(); - // We have deleted the 4 sst files in the delete_scheduler - ASSERT_EQ(bg_delete_file, 4); + ASSERT_EQ(bg_delete_file, num_sst_files + num_wal_files); } TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) { diff --git a/db/wal_manager.cc b/db/wal_manager.cc index b306df710..62511819e 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -29,6 +29,7 @@ #include "util/cast_util.h" #include "util/coding.h" #include "util/file_reader_writer.h" +#include "util/file_util.h" #include "util/filename.h" #include "util/logging.h" #include "util/mutexlock.h" @@ -190,7 +191,7 @@ void WalManager::PurgeObsoleteWALFiles() { continue; } if (now_seconds - file_m_time > db_options_.wal_ttl_seconds) { - s = env_->DeleteFile(file_path); + s = DeleteDBFile(&db_options_, file_path, archival_dir, false); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Can't delete file: %s: %s", file_path.c_str(), s.ToString().c_str()); @@ -216,7 +217,7 @@ void WalManager::PurgeObsoleteWALFiles() { log_file_size = std::max(log_file_size, file_size); ++log_files_num; } else { - s = env_->DeleteFile(file_path); + s = DeleteDBFile(&db_options_, file_path, archival_dir, false); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s", file_path.c_str(), @@ -255,7 +256,8 @@ void WalManager::PurgeObsoleteWALFiles() { for (size_t i = 0; i < files_del_num; ++i) { std::string const file_path = archived_logs[i]->PathName(); - s = env_->DeleteFile(db_options_.wal_dir + "/" + file_path); + s = DeleteDBFile(&db_options_, db_options_.wal_dir + "/" + file_path, + db_options_.wal_dir, false); if (!s.ok()) { ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s", file_path.c_str(), s.ToString().c_str()); diff --git a/include/rocksdb/sst_file_manager.h b/include/rocksdb/sst_file_manager.h index 1474da955..3e3ef859b 100644 --- a/include/rocksdb/sst_file_manager.h +++ b/include/rocksdb/sst_file_manager.h @@ -83,6 +83,8 @@ class SstFileManager { // Create a new SstFileManager that can be shared among multiple RocksDB // instances to track SST file and control there deletion rate. +// Even though SstFileManager don't track WAL files but it still control +// there deletion rate. // // @param env: Pointer to Env object, please see "rocksdb/env.h". // @param info_log: If not nullptr, info_log will be used to log errors. @@ -93,6 +95,7 @@ class SstFileManager { // this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb // in 1 second, we will wait for another 3 seconds before we delete other // files, Set to 0 to disable deletion rate limiting. +// This option also affect the delete rate of WAL files in the DB. // @param delete_existing_trash: Deprecated, this argument have no effect, but // if user provide trash_dir we will schedule deletes for files in the dir // @param status: If not nullptr, status will contain any errors that happened diff --git a/util/file_util.cc b/util/file_util.cc index 3f730f3e8..ba1b4744b 100644 --- a/util/file_util.cc +++ b/util/file_util.cc @@ -87,16 +87,11 @@ Status CreateFile(Env* env, const std::string& destination, return dest_writer->Sync(use_fsync); } -Status DeleteSSTFile(const ImmutableDBOptions* db_options, - const std::string& fname, const std::string& dir_to_sync) { - return DeleteDBFile(db_options, fname, dir_to_sync, false); -} - Status DeleteDBFile(const ImmutableDBOptions* db_options, const std::string& fname, const std::string& dir_to_sync, const bool force_bg) { #ifndef ROCKSDB_LITE - auto sfm = + SstFileManagerImpl* sfm = static_cast(db_options->sst_file_manager.get()); if (sfm) { return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg); @@ -107,6 +102,7 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options, (void)dir_to_sync; (void)force_bg; // SstFileManager is not supported in ROCKSDB_LITE + // Delete file immediately return db_options->env->DeleteFile(fname); #endif } diff --git a/util/file_util.h b/util/file_util.h index cd054518e..c3b365c8b 100644 --- a/util/file_util.h +++ b/util/file_util.h @@ -10,6 +10,7 @@ #include "rocksdb/env.h" #include "rocksdb/status.h" #include "rocksdb/types.h" +#include "util/filename.h" namespace rocksdb { // use_fsync maps to options.use_fsync, which determines the way that @@ -21,13 +22,9 @@ extern Status CopyFile(Env* env, const std::string& source, extern Status CreateFile(Env* env, const std::string& destination, const std::string& contents, bool use_fsync); -extern Status DeleteSSTFile(const ImmutableDBOptions* db_options, - const std::string& fname, - const std::string& path_to_sync); - extern Status DeleteDBFile(const ImmutableDBOptions* db_options, - const std::string& fname, - const std::string& path_to_sync, - const bool force_bg); + const std::string& fname, + const std::string& path_to_sync, + const bool force_bg = false); } // namespace rocksdb diff --git a/utilities/blob_db/blob_db_test.cc b/utilities/blob_db/blob_db_test.cc index e5c10f0c2..afb953df9 100644 --- a/utilities/blob_db/blob_db_test.cc +++ b/utilities/blob_db/blob_db_test.cc @@ -812,7 +812,11 @@ TEST_F(BlobDBTest, SstFileManager) { Destroy(); // Make sure that DestroyBlobDB() also goes through delete scheduler. ASSERT_GE(files_scheduled_to_delete, 2); - ASSERT_EQ(0, files_deleted_directly); + // Due to a timing issue, the WAL may or may not be deleted directly. The + // blob file is first scheduled, followed by WAL. If the background trash + // thread does not wake up on time, the WAL file will be directly + // deleted as the trash size will be > DB size + ASSERT_LE(files_deleted_directly, 1); SyncPoint::GetInstance()->DisableProcessing(); sfm->WaitForEmptyTrash(); } @@ -855,7 +859,8 @@ TEST_F(BlobDBTest, SstFileManagerRestart) { // Make sure that reopening the DB rescan the existing trash files Open(bdb_options, db_options); ASSERT_GE(files_scheduled_to_delete, 3); - ASSERT_EQ(0, files_deleted_directly); + // Depending on timing, the WAL file may or may not be directly deleted + ASSERT_LE(files_deleted_directly, 1); sfm->WaitForEmptyTrash();