Smooth the deletion of WAL files (#5116)

Summary:
WAL files are currently not subject to deletion rate limiting by DeleteScheduler. If the size of the WAL files is significant, this can cause a high delete rate on SSDs that may affect other operations. To fix it, force WAL file deletions to go through the SstFileManager. Original PR for this is #2768
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5116

Differential Revision: D14669437

Pulled By: anand1976

fbshipit-source-id: c5f62d0640cebaa1574de841a1d01e4ce2faadf0
This commit is contained in:
anand76 2019-03-28 15:13:02 -07:00 committed by Facebook Github Bot
parent a98317f555
commit dae3b5545c
8 changed files with 108 additions and 35 deletions

View File

@ -2878,8 +2878,8 @@ Status DestroyDB(const std::string& dbname, const Options& options,
std::string path_to_delete = dbname + "/" + fname; std::string path_to_delete = dbname + "/" + fname;
if (type == kMetaDatabase) { if (type == kMetaDatabase) {
del = DestroyDB(path_to_delete, options); del = DestroyDB(path_to_delete, options);
} else if (type == kTableFile) { } else if (type == kTableFile || type == kLogFile) {
del = DeleteSSTFile(&soptions, path_to_delete, dbname); del = DeleteDBFile(&soptions, path_to_delete, dbname);
} else { } else {
del = env->DeleteFile(path_to_delete); del = env->DeleteFile(path_to_delete);
} }
@ -2913,7 +2913,7 @@ Status DestroyDB(const std::string& dbname, const Options& options,
if (ParseFileName(fname, &number, &type) && if (ParseFileName(fname, &number, &type) &&
type == kTableFile) { // Lock file will be deleted at end type == kTableFile) { // Lock file will be deleted at end
std::string table_path = path + "/" + fname; std::string table_path = path + "/" + fname;
Status del = DeleteSSTFile(&soptions, table_path, dbname); Status del = DeleteDBFile(&soptions, table_path, dbname);
if (result.ok() && !del.ok()) { if (result.ok() && !del.ok()) {
result = del; result = del;
} }
@ -2939,7 +2939,8 @@ Status DestroyDB(const std::string& dbname, const Options& options,
// Delete archival files. // Delete archival files.
for (const auto& file : archiveFiles) { for (const auto& file : archiveFiles) {
if (ParseFileName(file, &number, &type) && type == kLogFile) { if (ParseFileName(file, &number, &type) && type == kLogFile) {
Status del = env->DeleteFile(archivedir + "/" + file); Status del =
DeleteDBFile(&soptions, archivedir + "/" + file, archivedir);
if (result.ok() && !del.ok()) { if (result.ok() && !del.ok()) {
result = del; result = del;
} }
@ -2952,7 +2953,9 @@ Status DestroyDB(const std::string& dbname, const Options& options,
if (wal_dir_exists) { if (wal_dir_exists) {
for (const auto& file : walDirFiles) { for (const auto& file : walDirFiles) {
if (ParseFileName(file, &number, &type) && type == kLogFile) { if (ParseFileName(file, &number, &type) && type == kLogFile) {
Status del = env->DeleteFile(LogFileName(soptions.wal_dir, number)); Status del =
DeleteDBFile(&soptions, LogFileName(soptions.wal_dir, number),
soptions.wal_dir);
if (result.ok() && !del.ok()) { if (result.ok() && !del.ok()) {
result = del; result = del;
} }

View File

@ -259,9 +259,9 @@ void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname,
const std::string& path_to_sync, const std::string& path_to_sync,
FileType type, uint64_t number) { FileType type, uint64_t number) {
Status file_deletion_status; Status file_deletion_status;
if (type == kTableFile) { if (type == kTableFile || type == kLogFile) {
file_deletion_status = file_deletion_status =
DeleteSSTFile(&immutable_db_options_, fname, path_to_sync); DeleteDBFile(&immutable_db_options_, fname, path_to_sync);
} else { } else {
file_deletion_status = env_->DeleteFile(fname); file_deletion_status = env_->DeleteFile(fname);
} }

View File

@ -367,14 +367,16 @@ TEST_F(DBSSTTest, RateLimitedDelete) {
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get()); auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1); sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);
WriteOptions wo;
wo.disableWAL = true;
ASSERT_OK(TryReopen(options)); ASSERT_OK(TryReopen(options));
// Create 4 files in L0 // Create 4 files in L0
for (char v = 'a'; v <= 'd'; v++) { for (char v = 'a'; v <= 'd'; v++) {
ASSERT_OK(Put("Key2", DummyString(1024, v))); ASSERT_OK(Put("Key2", DummyString(1024, v), wo));
ASSERT_OK(Put("Key3", DummyString(1024, v))); ASSERT_OK(Put("Key3", DummyString(1024, v), wo));
ASSERT_OK(Put("Key4", DummyString(1024, v))); ASSERT_OK(Put("Key4", DummyString(1024, v), wo));
ASSERT_OK(Put("Key1", DummyString(1024, v))); ASSERT_OK(Put("Key1", DummyString(1024, v), wo));
ASSERT_OK(Put("Key4", DummyString(1024, v))); ASSERT_OK(Put("Key4", DummyString(1024, v), wo));
ASSERT_OK(Flush()); ASSERT_OK(Flush());
} }
// We created 4 sst files in L0 // We created 4 sst files in L0
@ -408,6 +410,55 @@ TEST_F(DBSSTTest, RateLimitedDelete) {
rocksdb::SyncPoint::GetInstance()->DisableProcessing(); rocksdb::SyncPoint::GetInstance()->DisableProcessing();
} }
TEST_F(DBSSTTest, RateLimitedWALDelete) {
Destroy(last_options_);
std::vector<uint64_t> penalties;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DeleteScheduler::BackgroundEmptyTrash:Wait",
[&](void* arg) { penalties.push_back(*(static_cast<uint64_t*>(arg))); });
env_->no_slowdown_ = true;
env_->time_elapse_only_sleep_ = true;
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.env = env_;
int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec
Status s;
options.sst_file_manager.reset(
NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
ASSERT_OK(s);
options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec);
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->delete_scheduler()->SetMaxTrashDBRatio(2.1);
ASSERT_OK(TryReopen(options));
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
// Create 4 files in L0
for (char v = 'a'; v <= 'd'; v++) {
ASSERT_OK(Put("Key2", DummyString(1024, v)));
ASSERT_OK(Put("Key3", DummyString(1024, v)));
ASSERT_OK(Put("Key4", DummyString(1024, v)));
ASSERT_OK(Put("Key1", DummyString(1024, v)));
ASSERT_OK(Put("Key4", DummyString(1024, v)));
ASSERT_OK(Flush());
}
// We created 4 sst files in L0
ASSERT_EQ("4", FilesPerLevel(0));
// Compaction will move the 4 files in L0 to trash and create 1 L1 file
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
ASSERT_EQ("0,1", FilesPerLevel(0));
sfm->WaitForEmptyTrash();
ASSERT_EQ(penalties.size(), 8);
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
TEST_F(DBSSTTest, OpenDBWithExistingTrash) { TEST_F(DBSSTTest, OpenDBWithExistingTrash) {
Options options = CurrentOptions(); Options options = CurrentOptions();
@ -446,7 +497,6 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
rocksdb::SyncPoint::GetInstance()->SetCallBack( rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DeleteScheduler::DeleteFile", "DeleteScheduler::DeleteFile",
[&](void* /*arg*/) { bg_delete_file++; }); [&](void* /*arg*/) { bg_delete_file++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Options options = CurrentOptions(); Options options = CurrentOptions();
options.disable_auto_compactions = true; options.disable_auto_compactions = true;
@ -464,10 +514,14 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get()); auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
DestroyAndReopen(options); DestroyAndReopen(options);
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
WriteOptions wo;
wo.disableWAL = true;
// Create 4 files in L0 // Create 4 files in L0
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'))); ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'), wo));
ASSERT_OK(Flush()); ASSERT_OK(Flush());
} }
// We created 4 sst files in L0 // We created 4 sst files in L0
@ -483,7 +537,7 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
// Create 4 files in L0 // Create 4 files in L0
for (int i = 4; i < 8; i++) { for (int i = 4; i < 8; i++) {
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'))); ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'), wo));
ASSERT_OK(Flush()); ASSERT_OK(Flush());
} }
ASSERT_EQ("4,1", FilesPerLevel(0)); ASSERT_EQ("4,1", FilesPerLevel(0));
@ -538,14 +592,27 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) {
// Close DB and destroy it using DeleteScheduler // Close DB and destroy it using DeleteScheduler
Close(); Close();
int num_sst_files = 0;
int num_wal_files = 0;
std::vector<std::string> db_files;
env_->GetChildren(dbname_, &db_files);
for (std::string f : db_files) {
if (f.substr(f.find_last_of(".") + 1) == "sst") {
num_sst_files++;
} else if (f.substr(f.find_last_of(".") + 1) == "log") {
num_wal_files++;
}
}
ASSERT_GT(num_sst_files, 0);
ASSERT_GT(num_wal_files, 0);
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get()); auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->SetDeleteRateBytesPerSecond(1024 * 1024); sfm->SetDeleteRateBytesPerSecond(1024 * 1024);
sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1); sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);
ASSERT_OK(DestroyDB(dbname_, options)); ASSERT_OK(DestroyDB(dbname_, options));
sfm->WaitForEmptyTrash(); sfm->WaitForEmptyTrash();
// We have deleted the 4 sst files in the delete_scheduler ASSERT_EQ(bg_delete_file, num_sst_files + num_wal_files);
ASSERT_EQ(bg_delete_file, 4);
} }
TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) { TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) {

View File

@ -29,6 +29,7 @@
#include "util/cast_util.h" #include "util/cast_util.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h" #include "util/file_reader_writer.h"
#include "util/file_util.h"
#include "util/filename.h" #include "util/filename.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"
@ -190,7 +191,7 @@ void WalManager::PurgeObsoleteWALFiles() {
continue; continue;
} }
if (now_seconds - file_m_time > db_options_.wal_ttl_seconds) { if (now_seconds - file_m_time > db_options_.wal_ttl_seconds) {
s = env_->DeleteFile(file_path); s = DeleteDBFile(&db_options_, file_path, archival_dir, false);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log, "Can't delete file: %s: %s", ROCKS_LOG_WARN(db_options_.info_log, "Can't delete file: %s: %s",
file_path.c_str(), s.ToString().c_str()); file_path.c_str(), s.ToString().c_str());
@ -216,7 +217,7 @@ void WalManager::PurgeObsoleteWALFiles() {
log_file_size = std::max(log_file_size, file_size); log_file_size = std::max(log_file_size, file_size);
++log_files_num; ++log_files_num;
} else { } else {
s = env_->DeleteFile(file_path); s = DeleteDBFile(&db_options_, file_path, archival_dir, false);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log, ROCKS_LOG_WARN(db_options_.info_log,
"Unable to delete file: %s: %s", file_path.c_str(), "Unable to delete file: %s: %s", file_path.c_str(),
@ -255,7 +256,8 @@ void WalManager::PurgeObsoleteWALFiles() {
for (size_t i = 0; i < files_del_num; ++i) { for (size_t i = 0; i < files_del_num; ++i) {
std::string const file_path = archived_logs[i]->PathName(); std::string const file_path = archived_logs[i]->PathName();
s = env_->DeleteFile(db_options_.wal_dir + "/" + file_path); s = DeleteDBFile(&db_options_, db_options_.wal_dir + "/" + file_path,
db_options_.wal_dir, false);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s", ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s",
file_path.c_str(), s.ToString().c_str()); file_path.c_str(), s.ToString().c_str());

View File

@ -83,6 +83,8 @@ class SstFileManager {
// Create a new SstFileManager that can be shared among multiple RocksDB // Create a new SstFileManager that can be shared among multiple RocksDB
// instances to track SST file and control there deletion rate. // instances to track SST file and control there deletion rate.
// Even though SstFileManager don't track WAL files but it still control
// there deletion rate.
// //
// @param env: Pointer to Env object, please see "rocksdb/env.h". // @param env: Pointer to Env object, please see "rocksdb/env.h".
// @param info_log: If not nullptr, info_log will be used to log errors. // @param info_log: If not nullptr, info_log will be used to log errors.
@ -93,6 +95,7 @@ class SstFileManager {
// this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb // this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb
// in 1 second, we will wait for another 3 seconds before we delete other // in 1 second, we will wait for another 3 seconds before we delete other
// files, Set to 0 to disable deletion rate limiting. // files, Set to 0 to disable deletion rate limiting.
// This option also affect the delete rate of WAL files in the DB.
// @param delete_existing_trash: Deprecated, this argument have no effect, but // @param delete_existing_trash: Deprecated, this argument have no effect, but
// if user provide trash_dir we will schedule deletes for files in the dir // if user provide trash_dir we will schedule deletes for files in the dir
// @param status: If not nullptr, status will contain any errors that happened // @param status: If not nullptr, status will contain any errors that happened

View File

@ -87,16 +87,11 @@ Status CreateFile(Env* env, const std::string& destination,
return dest_writer->Sync(use_fsync); return dest_writer->Sync(use_fsync);
} }
Status DeleteSSTFile(const ImmutableDBOptions* db_options,
const std::string& fname, const std::string& dir_to_sync) {
return DeleteDBFile(db_options, fname, dir_to_sync, false);
}
Status DeleteDBFile(const ImmutableDBOptions* db_options, Status DeleteDBFile(const ImmutableDBOptions* db_options,
const std::string& fname, const std::string& dir_to_sync, const std::string& fname, const std::string& dir_to_sync,
const bool force_bg) { const bool force_bg) {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
auto sfm = SstFileManagerImpl* sfm =
static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get()); static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get());
if (sfm) { if (sfm) {
return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg); return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
@ -107,6 +102,7 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options,
(void)dir_to_sync; (void)dir_to_sync;
(void)force_bg; (void)force_bg;
// SstFileManager is not supported in ROCKSDB_LITE // SstFileManager is not supported in ROCKSDB_LITE
// Delete file immediately
return db_options->env->DeleteFile(fname); return db_options->env->DeleteFile(fname);
#endif #endif
} }

View File

@ -10,6 +10,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/types.h" #include "rocksdb/types.h"
#include "util/filename.h"
namespace rocksdb { namespace rocksdb {
// use_fsync maps to options.use_fsync, which determines the way that // use_fsync maps to options.use_fsync, which determines the way that
@ -21,13 +22,9 @@ extern Status CopyFile(Env* env, const std::string& source,
extern Status CreateFile(Env* env, const std::string& destination, extern Status CreateFile(Env* env, const std::string& destination,
const std::string& contents, bool use_fsync); const std::string& contents, bool use_fsync);
extern Status DeleteSSTFile(const ImmutableDBOptions* db_options,
const std::string& fname,
const std::string& path_to_sync);
extern Status DeleteDBFile(const ImmutableDBOptions* db_options, extern Status DeleteDBFile(const ImmutableDBOptions* db_options,
const std::string& fname, const std::string& fname,
const std::string& path_to_sync, const std::string& path_to_sync,
const bool force_bg); const bool force_bg = false);
} // namespace rocksdb } // namespace rocksdb

View File

@ -812,7 +812,11 @@ TEST_F(BlobDBTest, SstFileManager) {
Destroy(); Destroy();
// Make sure that DestroyBlobDB() also goes through delete scheduler. // Make sure that DestroyBlobDB() also goes through delete scheduler.
ASSERT_GE(files_scheduled_to_delete, 2); ASSERT_GE(files_scheduled_to_delete, 2);
ASSERT_EQ(0, files_deleted_directly); // Due to a timing issue, the WAL may or may not be deleted directly. The
// blob file is first scheduled, followed by WAL. If the background trash
// thread does not wake up on time, the WAL file will be directly
// deleted as the trash size will be > DB size
ASSERT_LE(files_deleted_directly, 1);
SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->DisableProcessing();
sfm->WaitForEmptyTrash(); sfm->WaitForEmptyTrash();
} }
@ -855,7 +859,8 @@ TEST_F(BlobDBTest, SstFileManagerRestart) {
// Make sure that reopening the DB rescan the existing trash files // Make sure that reopening the DB rescan the existing trash files
Open(bdb_options, db_options); Open(bdb_options, db_options);
ASSERT_GE(files_scheduled_to_delete, 3); ASSERT_GE(files_scheduled_to_delete, 3);
ASSERT_EQ(0, files_deleted_directly); // Depending on timing, the WAL file may or may not be directly deleted
ASSERT_LE(files_deleted_directly, 1);
sfm->WaitForEmptyTrash(); sfm->WaitForEmptyTrash();