SstFileManager: add bytes_max_delete_chunk
Summary: Add `bytes_max_delete_chunk` in SstFileManager so that we can drop a large file in multiple batches. Closes https://github.com/facebook/rocksdb/pull/3640 Differential Revision: D7358679 Pulled By: siying fbshipit-source-id: ef17f0da2f5723dbece2669485a9b91b3edc0bb7
This commit is contained in:
parent
88c3e26cc0
commit
118058ba69
@ -17,6 +17,7 @@
|
|||||||
* Avoid unnecessarily flushing in `CompactRange()` when the range specified by the user does not overlap unflushed memtables.
|
* Avoid unnecessarily flushing in `CompactRange()` when the range specified by the user does not overlap unflushed memtables.
|
||||||
* If `ColumnFamilyOptions::max_subcompactions` is set greater than one, we now parallelize large manual level-based compactions.
|
* If `ColumnFamilyOptions::max_subcompactions` is set greater than one, we now parallelize large manual level-based compactions.
|
||||||
* Add "rocksdb.live-sst-files-size" DB property to return total bytes of all SST files belong to the latest LSM tree.
|
* Add "rocksdb.live-sst-files-size" DB property to return total bytes of all SST files belong to the latest LSM tree.
|
||||||
|
* NewSstFileManager to add an argument bytes_max_delete_chunk with default 64MB. With this argument, a file larger than 64MB will be ftruncated multiple times based on this size.
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
* Fix a leak in prepared_section_completed_ where the zeroed entries would not removed from the map.
|
* Fix a leak in prepared_section_completed_ where the zeroed entries would not removed from the map.
|
||||||
|
@ -96,10 +96,14 @@ class SstFileManager {
|
|||||||
// @param max_trash_db_ratio: If the trash size constitutes for more than this
|
// @param max_trash_db_ratio: If the trash size constitutes for more than this
|
||||||
// fraction of the total DB size we will start deleting new files passed to
|
// fraction of the total DB size we will start deleting new files passed to
|
||||||
// DeleteScheduler immediately
|
// DeleteScheduler immediately
|
||||||
|
// @param bytes_max_delete_chunk: if a single file is larger than delete chunk,
|
||||||
|
// ftruncate the file by this size each time, rather than dropping the whole
|
||||||
|
// file. 0 means to always delete the whole file.
|
||||||
extern SstFileManager* NewSstFileManager(
|
extern SstFileManager* NewSstFileManager(
|
||||||
Env* env, std::shared_ptr<Logger> info_log = nullptr,
|
Env* env, std::shared_ptr<Logger> info_log = nullptr,
|
||||||
std::string trash_dir = "", int64_t rate_bytes_per_sec = 0,
|
std::string trash_dir = "", int64_t rate_bytes_per_sec = 0,
|
||||||
bool delete_existing_trash = true, Status* status = nullptr,
|
bool delete_existing_trash = true, Status* status = nullptr,
|
||||||
double max_trash_db_ratio = 0.25);
|
double max_trash_db_ratio = 0.25,
|
||||||
|
uint64_t bytes_max_delete_chunk = 64 * 1024 * 1024);
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -22,11 +22,13 @@ namespace rocksdb {
|
|||||||
DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec,
|
DeleteScheduler::DeleteScheduler(Env* env, int64_t rate_bytes_per_sec,
|
||||||
Logger* info_log,
|
Logger* info_log,
|
||||||
SstFileManagerImpl* sst_file_manager,
|
SstFileManagerImpl* sst_file_manager,
|
||||||
double max_trash_db_ratio)
|
double max_trash_db_ratio,
|
||||||
|
uint64_t bytes_max_delete_chunk)
|
||||||
: env_(env),
|
: env_(env),
|
||||||
total_trash_size_(0),
|
total_trash_size_(0),
|
||||||
rate_bytes_per_sec_(rate_bytes_per_sec),
|
rate_bytes_per_sec_(rate_bytes_per_sec),
|
||||||
pending_files_(0),
|
pending_files_(0),
|
||||||
|
bytes_max_delete_chunk_(bytes_max_delete_chunk),
|
||||||
closing_(false),
|
closing_(false),
|
||||||
cv_(&mu_),
|
cv_(&mu_),
|
||||||
info_log_(info_log),
|
info_log_(info_log),
|
||||||
@ -208,15 +210,18 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
|||||||
|
|
||||||
// Get new file to delete
|
// Get new file to delete
|
||||||
std::string path_in_trash = queue_.front();
|
std::string path_in_trash = queue_.front();
|
||||||
queue_.pop();
|
|
||||||
|
|
||||||
// We dont need to hold the lock while deleting the file
|
// We dont need to hold the lock while deleting the file
|
||||||
mu_.Unlock();
|
mu_.Unlock();
|
||||||
uint64_t deleted_bytes = 0;
|
uint64_t deleted_bytes = 0;
|
||||||
|
bool is_complete = true;
|
||||||
// Delete file from trash and update total_penlty value
|
// Delete file from trash and update total_penlty value
|
||||||
Status s = DeleteTrashFile(path_in_trash, &deleted_bytes);
|
Status s = DeleteTrashFile(path_in_trash, &deleted_bytes, &is_complete);
|
||||||
total_deleted_bytes += deleted_bytes;
|
total_deleted_bytes += deleted_bytes;
|
||||||
mu_.Lock();
|
mu_.Lock();
|
||||||
|
if (is_complete) {
|
||||||
|
queue_.pop();
|
||||||
|
}
|
||||||
|
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
bg_errors_[path_in_trash] = s;
|
bg_errors_[path_in_trash] = s;
|
||||||
@ -236,7 +241,9 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
|||||||
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
|
TEST_SYNC_POINT_CALLBACK("DeleteScheduler::BackgroundEmptyTrash:Wait",
|
||||||
&total_penlty);
|
&total_penlty);
|
||||||
|
|
||||||
pending_files_--;
|
if (is_complete) {
|
||||||
|
pending_files_--;
|
||||||
|
}
|
||||||
if (pending_files_ == 0) {
|
if (pending_files_ == 0) {
|
||||||
// Unblock WaitForEmptyTrash since there are no more files waiting
|
// Unblock WaitForEmptyTrash since there are no more files waiting
|
||||||
// to be deleted
|
// to be deleted
|
||||||
@ -247,23 +254,49 @@ void DeleteScheduler::BackgroundEmptyTrash() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
Status DeleteScheduler::DeleteTrashFile(const std::string& path_in_trash,
|
||||||
uint64_t* deleted_bytes) {
|
uint64_t* deleted_bytes,
|
||||||
|
bool* is_complete) {
|
||||||
uint64_t file_size;
|
uint64_t file_size;
|
||||||
Status s = env_->GetFileSize(path_in_trash, &file_size);
|
Status s = env_->GetFileSize(path_in_trash, &file_size);
|
||||||
|
*is_complete = true;
|
||||||
|
TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:DeleteFile");
|
bool need_full_delete = true;
|
||||||
s = env_->DeleteFile(path_in_trash);
|
if (bytes_max_delete_chunk_ != 0 && file_size > bytes_max_delete_chunk_) {
|
||||||
}
|
unique_ptr<WritableFile> wf;
|
||||||
|
Status my_status =
|
||||||
|
env_->ReopenWritableFile(path_in_trash, &wf, EnvOptions());
|
||||||
|
if (my_status.ok()) {
|
||||||
|
my_status = wf->Truncate(file_size - bytes_max_delete_chunk_);
|
||||||
|
if (my_status.ok()) {
|
||||||
|
TEST_SYNC_POINT("DeleteScheduler::DeleteTrashFile:Fsync");
|
||||||
|
my_status = wf->Fsync();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (my_status.ok()) {
|
||||||
|
*deleted_bytes = bytes_max_delete_chunk_;
|
||||||
|
need_full_delete = false;
|
||||||
|
*is_complete = false;
|
||||||
|
} else {
|
||||||
|
ROCKS_LOG_WARN(info_log_,
|
||||||
|
"Failed to partially delete %s from trash -- %s",
|
||||||
|
path_in_trash.c_str(), my_status.ToString().c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_full_delete) {
|
||||||
|
s = env_->DeleteFile(path_in_trash);
|
||||||
|
*deleted_bytes = file_size;
|
||||||
|
sst_file_manager_->OnDeleteFile(path_in_trash);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
// Error while getting file size or while deleting
|
// Error while getting file size or while deleting
|
||||||
ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s",
|
ROCKS_LOG_ERROR(info_log_, "Failed to delete %s from trash -- %s",
|
||||||
path_in_trash.c_str(), s.ToString().c_str());
|
path_in_trash.c_str(), s.ToString().c_str());
|
||||||
*deleted_bytes = 0;
|
*deleted_bytes = 0;
|
||||||
} else {
|
} else {
|
||||||
*deleted_bytes = file_size;
|
total_trash_size_.fetch_sub(*deleted_bytes);
|
||||||
total_trash_size_.fetch_sub(file_size);
|
|
||||||
sst_file_manager_->OnDeleteFile(path_in_trash);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return s;
|
return s;
|
||||||
|
@ -34,7 +34,7 @@ class DeleteScheduler {
|
|||||||
public:
|
public:
|
||||||
DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log,
|
DeleteScheduler(Env* env, int64_t rate_bytes_per_sec, Logger* info_log,
|
||||||
SstFileManagerImpl* sst_file_manager,
|
SstFileManagerImpl* sst_file_manager,
|
||||||
double max_trash_db_ratio);
|
double max_trash_db_ratio, uint64_t bytes_max_delete_chunk);
|
||||||
|
|
||||||
~DeleteScheduler();
|
~DeleteScheduler();
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ class DeleteScheduler {
|
|||||||
Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash);
|
Status MarkAsTrash(const std::string& file_path, std::string* path_in_trash);
|
||||||
|
|
||||||
Status DeleteTrashFile(const std::string& path_in_trash,
|
Status DeleteTrashFile(const std::string& path_in_trash,
|
||||||
uint64_t* deleted_bytes);
|
uint64_t* deleted_bytes, bool* is_complete);
|
||||||
|
|
||||||
void BackgroundEmptyTrash();
|
void BackgroundEmptyTrash();
|
||||||
|
|
||||||
@ -97,6 +97,7 @@ class DeleteScheduler {
|
|||||||
std::queue<std::string> queue_;
|
std::queue<std::string> queue_;
|
||||||
// Number of trash files that are waiting to be deleted
|
// Number of trash files that are waiting to be deleted
|
||||||
int32_t pending_files_;
|
int32_t pending_files_;
|
||||||
|
uint64_t bytes_max_delete_chunk_;
|
||||||
// Errors that happened in BackgroundEmptyTrash (file_path => error)
|
// Errors that happened in BackgroundEmptyTrash (file_path => error)
|
||||||
std::map<std::string, Status> bg_errors_;
|
std::map<std::string, Status> bg_errors_;
|
||||||
// Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop
|
// Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop
|
||||||
|
@ -99,7 +99,7 @@ class DeleteSchedulerTest : public testing::Test {
|
|||||||
// 25%)
|
// 25%)
|
||||||
sst_file_mgr_.reset(
|
sst_file_mgr_.reset(
|
||||||
new SstFileManagerImpl(env_, nullptr, rate_bytes_per_sec_,
|
new SstFileManagerImpl(env_, nullptr, rate_bytes_per_sec_,
|
||||||
/* max_trash_db_ratio= */ 1.1));
|
/* max_trash_db_ratio= */ 1.1, 128 * 1024));
|
||||||
delete_scheduler_ = sst_file_mgr_->delete_scheduler();
|
delete_scheduler_ = sst_file_mgr_->delete_scheduler();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -436,6 +436,34 @@ TEST_F(DeleteSchedulerTest, StartBGEmptyTrashMultipleTimes) {
|
|||||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DeleteSchedulerTest, DeletePartialFile) {
|
||||||
|
int bg_delete_file = 0;
|
||||||
|
int bg_fsync = 0;
|
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteTrashFile:DeleteFile",
|
||||||
|
[&](void*) { bg_delete_file++; });
|
||||||
|
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||||
|
"DeleteScheduler::DeleteTrashFile:Fsync", [&](void*) { bg_fsync++; });
|
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
|
||||||
|
rate_bytes_per_sec_ = 1024 * 1024; // 1 MB / sec
|
||||||
|
NewDeleteScheduler();
|
||||||
|
|
||||||
|
// Should delete in 4 batch
|
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_1", 500 * 1024)));
|
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_2", 100 * 1024)));
|
||||||
|
// Should delete in 2 batch
|
||||||
|
ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile("data_2", 200 * 1024)));
|
||||||
|
|
||||||
|
delete_scheduler_->WaitForEmptyTrash();
|
||||||
|
|
||||||
|
auto bg_errors = delete_scheduler_->GetBackgroundErrors();
|
||||||
|
ASSERT_EQ(bg_errors.size(), 0);
|
||||||
|
ASSERT_EQ(7, bg_delete_file);
|
||||||
|
ASSERT_EQ(4, bg_fsync);
|
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
}
|
||||||
|
|
||||||
// 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec)
|
// 1- Create a DeleteScheduler with very slow rate limit (1 Byte / sec)
|
||||||
// 2- Delete 100 files using DeleteScheduler
|
// 2- Delete 100 files using DeleteScheduler
|
||||||
// 3- Delete the DeleteScheduler (call the destructor while queue is not empty)
|
// 3- Delete the DeleteScheduler (call the destructor while queue is not empty)
|
||||||
|
@ -18,7 +18,8 @@ namespace rocksdb {
|
|||||||
#ifndef ROCKSDB_LITE
|
#ifndef ROCKSDB_LITE
|
||||||
SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
|
SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
|
||||||
int64_t rate_bytes_per_sec,
|
int64_t rate_bytes_per_sec,
|
||||||
double max_trash_db_ratio)
|
double max_trash_db_ratio,
|
||||||
|
uint64_t bytes_max_delete_chunk)
|
||||||
: env_(env),
|
: env_(env),
|
||||||
logger_(logger),
|
logger_(logger),
|
||||||
total_files_size_(0),
|
total_files_size_(0),
|
||||||
@ -26,7 +27,7 @@ SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
|
|||||||
cur_compactions_reserved_size_(0),
|
cur_compactions_reserved_size_(0),
|
||||||
max_allowed_space_(0),
|
max_allowed_space_(0),
|
||||||
delete_scheduler_(env, rate_bytes_per_sec, logger.get(), this,
|
delete_scheduler_(env, rate_bytes_per_sec, logger.get(), this,
|
||||||
max_trash_db_ratio) {}
|
max_trash_db_ratio, bytes_max_delete_chunk) {}
|
||||||
|
|
||||||
SstFileManagerImpl::~SstFileManagerImpl() {}
|
SstFileManagerImpl::~SstFileManagerImpl() {}
|
||||||
|
|
||||||
@ -196,10 +197,11 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
|
|||||||
std::string trash_dir,
|
std::string trash_dir,
|
||||||
int64_t rate_bytes_per_sec,
|
int64_t rate_bytes_per_sec,
|
||||||
bool delete_existing_trash, Status* status,
|
bool delete_existing_trash, Status* status,
|
||||||
double max_trash_db_ratio) {
|
double max_trash_db_ratio,
|
||||||
|
uint64_t bytes_max_delete_chunk) {
|
||||||
SstFileManagerImpl* res =
|
SstFileManagerImpl* res =
|
||||||
new SstFileManagerImpl(env, info_log, rate_bytes_per_sec,
|
new SstFileManagerImpl(env, info_log, rate_bytes_per_sec,
|
||||||
max_trash_db_ratio);
|
max_trash_db_ratio, bytes_max_delete_chunk);
|
||||||
|
|
||||||
// trash_dir is deprecated and not needed anymore, but if user passed it
|
// trash_dir is deprecated and not needed anymore, but if user passed it
|
||||||
// we will still remove files in it.
|
// we will still remove files in it.
|
||||||
@ -236,7 +238,8 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
|
|||||||
std::string trash_dir,
|
std::string trash_dir,
|
||||||
int64_t rate_bytes_per_sec,
|
int64_t rate_bytes_per_sec,
|
||||||
bool delete_existing_trash, Status* status,
|
bool delete_existing_trash, Status* status,
|
||||||
double max_trash_db_ratio) {
|
double max_trash_db_ratio,
|
||||||
|
uint64_t bytes_max_delete_chunk) {
|
||||||
if (status) {
|
if (status) {
|
||||||
*status =
|
*status =
|
||||||
Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE");
|
Status::NotSupported("SstFileManager is not supported in ROCKSDB_LITE");
|
||||||
|
@ -27,7 +27,8 @@ class SstFileManagerImpl : public SstFileManager {
|
|||||||
public:
|
public:
|
||||||
explicit SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
|
explicit SstFileManagerImpl(Env* env, std::shared_ptr<Logger> logger,
|
||||||
int64_t rate_bytes_per_sec,
|
int64_t rate_bytes_per_sec,
|
||||||
double max_trash_db_ratio);
|
double max_trash_db_ratio,
|
||||||
|
uint64_t bytes_max_delete_chunk);
|
||||||
|
|
||||||
~SstFileManagerImpl();
|
~SstFileManagerImpl();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user