Auto-GarbageCollect on PurgeOldBackups and DeleteBackup (#6015)

Summary:
Only if there is a crash, power failure, or I/O error in
DeleteBackup, shared or private files from the backup might be left
behind that are not cleaned up by PurgeOldBackups or DeleteBackup-- only
by GarbageCollect. This makes the BackupEngine API "leaky by default."
Even if it means a modest performance hit, I think we should make
Delete and Purge do as they say, with ongoing best effort: i.e. future
calls will attempt to finish any incomplete work from earlier calls.

This change does that by having DeleteBackup and PurgeOldBackups do a
GarbageCollect, unless (to minimize performance hit) this BackupEngine
has already done a GarbageCollect and there have been no
deletion-related I/O errors in that GarbageCollect or since then.

Rejected alternative 1: remove meta file last instead of first. This would in theory turn partially deleted backups into corrupted backups, but code changes would be needed to allow the missing files and consider it acceptably corrupt, rather than failing to open the BackupEngine. This might be a reasonable choice, but I mostly rejected it because it doesn't solve the legacy problem of cleaning up existing lingering files.

Rejected alternative 2: use a deletion marker file. If deletion started with creating a file that marks a backup as flagged for deletion, then we could reliably detect partially deleted backups and efficiently finish removing them. In addition to not solving the legacy problem, this could be precarious if there's a disk full situation, and we try to create a new file in order to delete some files. Ugh.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6015

Test Plan: Updated unit tests

Differential Revision: D18401333

Pulled By: pdillinger

fbshipit-source-id: 12944e372ce6809f3f5a4c416c3b321a8927d925
This commit is contained in:
Peter Dillinger 2019-11-08 19:13:41 -08:00
parent cb1dc29655
commit a6d418384d
4 changed files with 207 additions and 73 deletions

View File

@ -3,6 +3,7 @@
### Bug Fixes
* Fix a assertion failure in MultiGe4t() when BlockBasedTableOptions::no_block_cache is true and there is no compressed block cache
* Fix a buffer overrun problem in BlockBasedTable::MultiGet() when compression is enabled and no compressed block cache is configured.
* If a call to BackupEngine::PurgeOldBackups or BackupEngine::DeleteBackup suffered a crash, power failure, or I/O error, files could be left over from old backups that could only be purged with a call to GarbageCollect. Any call to PurgeOldBackups, DeleteBackup, or GarbageCollect should now suffice to purge such files.
## 6.5.1 (10/16/2019)
### Bug Fixes

View File

@ -325,7 +325,10 @@ class BackupEngine {
// Will delete all the files we don't need anymore
// It will do the full scan of the files/ directory and delete all the
// files that are not referenced.
// files that are not referenced. PurgeOldBackups() and DeleteBackup()
// will do a similar operation as needed to clean up from any incomplete
// deletions, so this function is not really needed if calling one of
// those.
virtual Status GarbageCollect() = 0;
};

View File

@ -9,24 +9,10 @@
#ifndef ROCKSDB_LITE
#include "rocksdb/utilities/backupable_db.h"
#include "file/filename.h"
#include "logging/logging.h"
#include "port/port.h"
#include "rocksdb/rate_limiter.h"
#include "rocksdb/transaction_log.h"
#include "test_util/sync_point.h"
#include "util/channel.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/file_reader_writer.h"
#include "util/string_util.h"
#include "utilities/checkpoint/checkpoint_impl.h"
#include <cinttypes>
#include <stdlib.h>
#include <algorithm>
#include <atomic>
#include <cinttypes>
#include <functional>
#include <future>
#include <limits>
@ -39,6 +25,20 @@
#include <unordered_set>
#include <vector>
#include "file/filename.h"
#include "logging/logging.h"
#include "port/port.h"
#include "rocksdb/rate_limiter.h"
#include "rocksdb/transaction_log.h"
#include "rocksdb/utilities/backupable_db.h"
#include "test_util/sync_point.h"
#include "util/channel.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/file_reader_writer.h"
#include "util/string_util.h"
#include "utilities/checkpoint/checkpoint_impl.h"
namespace rocksdb {
void BackupStatistics::IncrementNumberSuccessBackup() {
@ -120,6 +120,7 @@ class BackupEngineImpl : public BackupEngine {
private:
void DeleteChildren(const std::string& dir, uint32_t file_type_filter = 0);
Status DeleteBackupInternal(BackupID backup_id);
// Extends the "result" map with pathname->size mappings for the contents of
// "dir" in "env". Pathnames are prefixed with "dir".
@ -456,6 +457,10 @@ class BackupEngineImpl : public BackupEngine {
std::mutex byte_report_mutex_;
channel<CopyOrCreateWorkItem> files_to_copy_or_create_;
std::vector<port::Thread> threads_;
// Certain operations like PurgeOldBackups and DeleteBackup will trigger
// automatic GarbageCollect (true) unless we've already done one in this
// session and have not failed to delete backup files since then (false).
bool might_need_garbage_collect_ = true;
// Adds a file to the backup work queue to be copied or created if it doesn't
// already exist.
@ -559,6 +564,9 @@ Status BackupEngineImpl::Initialize() {
options_.Dump(options_.info_log);
if (!read_only_) {
// we might need to clean up from previous crash or I/O errors
might_need_garbage_collect_ = true;
// gather the list of directories that we need to create
std::vector<std::pair<std::string, std::unique_ptr<Directory>*>>
directories;
@ -928,8 +936,8 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
ROCKS_LOG_INFO(options_.info_log, "Backup Statistics %s\n",
backup_statistics_.ToString().c_str());
// delete files that we might have already written
might_need_garbage_collect_ = true;
DeleteBackup(new_backup_id);
GarbageCollect();
return s;
}
@ -957,6 +965,10 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
assert(initialized_);
assert(!read_only_);
// Best effort deletion even with errors
Status overall_status = Status::OK();
ROCKS_LOG_INFO(options_.info_log, "Purging old backups, keeping %u",
num_backups_to_keep);
std::vector<BackupID> to_delete;
@ -966,17 +978,44 @@ Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
itr++;
}
for (auto backup_id : to_delete) {
auto s = DeleteBackup(backup_id);
auto s = DeleteBackupInternal(backup_id);
if (!s.ok()) {
return s;
overall_status = s;
}
}
return Status::OK();
// Clean up after any incomplete backup deletion, potentially from
// earlier session.
if (might_need_garbage_collect_) {
auto s = GarbageCollect();
if (!s.ok() && overall_status.ok()) {
overall_status = s;
}
}
return overall_status;
}
Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
auto s1 = DeleteBackupInternal(backup_id);
auto s2 = Status::OK();
// Clean up after any incomplete backup deletion, potentially from
// earlier session.
if (might_need_garbage_collect_) {
s2 = GarbageCollect();
}
if (!s1.ok()) {
return s1;
} else {
return s2;
}
}
// Does not auto-GarbageCollect
Status BackupEngineImpl::DeleteBackupInternal(BackupID backup_id) {
assert(initialized_);
assert(!read_only_);
ROCKS_LOG_INFO(options_.info_log, "Deleting backup %u", backup_id);
auto backup = backups_.find(backup_id);
if (backup != backups_.end()) {
@ -997,6 +1036,10 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
corrupt_backups_.erase(corrupt);
}
// After removing meta file, best effort deletion even with errors.
// (Don't delete other files if we can't delete the meta file right
// now.)
if (options_.max_valid_backups_to_open == port::kMaxInt32) {
std::vector<std::string> to_delete;
for (auto& itr : backuped_file_infos_) {
@ -1005,6 +1048,10 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
itr.first.c_str(), s.ToString().c_str());
to_delete.push_back(itr.first);
if (!s.ok()) {
// Trying again later might work
might_need_garbage_collect_ = true;
}
}
}
for (auto& td : to_delete) {
@ -1023,6 +1070,10 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
ROCKS_LOG_INFO(options_.info_log, "Deleting private dir %s -- %s",
private_dir.c_str(), s.ToString().c_str());
if (!s.ok()) {
// Full gc or trying again later might work
might_need_garbage_collect_ = true;
}
return Status::OK();
}
@ -1505,8 +1556,15 @@ Status BackupEngineImpl::InsertPathnameToSizeBytes(
Status BackupEngineImpl::GarbageCollect() {
assert(!read_only_);
// We will make a best effort to remove all garbage even in the presence
// of inconsistencies or I/O failures that inhibit finding garbage.
Status overall_status = Status::OK();
// If all goes well, we don't need another auto-GC this session
might_need_garbage_collect_ = false;
ROCKS_LOG_INFO(options_.info_log, "Starting garbage collection");
if (options_.max_valid_backups_to_open == port::kMaxInt32) {
if (options_.max_valid_backups_to_open != port::kMaxInt32) {
ROCKS_LOG_WARN(
options_.info_log,
"Garbage collection is limited since `max_valid_backups_to_open` "
@ -1533,7 +1591,9 @@ Status BackupEngineImpl::GarbageCollect() {
s = Status::OK();
}
if (!s.ok()) {
return s;
overall_status = s;
// Trying again later might work
might_need_garbage_collect_ = true;
}
}
for (auto& child : shared_children) {
@ -1553,6 +1613,10 @@ Status BackupEngineImpl::GarbageCollect() {
ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
rel_fname.c_str(), s.ToString().c_str());
backuped_file_infos_.erase(rel_fname);
if (!s.ok()) {
// Trying again later might work
might_need_garbage_collect_ = true;
}
}
}
}
@ -1563,7 +1627,9 @@ Status BackupEngineImpl::GarbageCollect() {
auto s = backup_env_->GetChildren(GetAbsolutePath(GetPrivateDirRel()),
&private_children);
if (!s.ok()) {
return s;
overall_status = s;
// Trying again later might work
might_need_garbage_collect_ = true;
}
}
for (auto& child : private_children) {
@ -1587,14 +1653,23 @@ Status BackupEngineImpl::GarbageCollect() {
ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s",
(full_private_path + subchild).c_str(),
s.ToString().c_str());
if (!s.ok()) {
// Trying again later might work
might_need_garbage_collect_ = true;
}
}
// finally delete the private dir
Status s = backup_env_->DeleteDir(full_private_path);
ROCKS_LOG_INFO(options_.info_log, "Deleting dir %s -- %s",
full_private_path.c_str(), s.ToString().c_str());
if (!s.ok()) {
// Trying again later might work
might_need_garbage_collect_ = true;
}
}
return Status::OK();
assert(overall_status.ok() || might_need_garbage_collect_);
return overall_status;
}
// ------- BackupMeta class --------

View File

@ -839,7 +839,7 @@ INSTANTIATE_TEST_CASE_P(BackupableDBTestWithParam, BackupableDBTestWithParam,
::testing::Bool());
// this will make sure that backup does not copy the same file twice
TEST_F(BackupableDBTest, NoDoubleCopy) {
TEST_F(BackupableDBTest, NoDoubleCopy_And_AutoGC) {
OpenDBAndBackupEngine(true, true);
// should write 5 DB files + one meta file
@ -857,23 +857,30 @@ TEST_F(BackupableDBTest, NoDoubleCopy) {
AppendPath(backupdir_, should_have_written);
test_backup_env_->AssertWrittenFiles(should_have_written);
char db_number = '1';
for (std::string other_sst : {"00015.sst", "00017.sst", "00019.sst"}) {
// should write 4 new DB files + one meta file
// should not write/copy 00010.sst, since it's already there!
test_backup_env_->SetLimitWrittenFiles(6);
test_backup_env_->ClearWrittenFiles();
dummy_db_->live_files_ = {"/00010.sst", "/00015.sst", "/CURRENT",
dummy_db_->live_files_ = {"/00010.sst", "/" + other_sst, "/CURRENT",
"/MANIFEST-01"};
dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}};
test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false));
// should not open 00010.sst - it's already there
should_have_written = {"/shared/.00015.sst.tmp", "/private/2/CURRENT",
"/private/2/MANIFEST-01", "/private/2/00011.log",
"/meta/.2.tmp"};
++db_number;
std::string private_dir = std::string("/private/") + db_number;
should_have_written = {
"/shared/." + other_sst + ".tmp", private_dir + "/CURRENT",
private_dir + "/MANIFEST-01", private_dir + "/00011.log",
std::string("/meta/.") + db_number + ".tmp"};
AppendPath(backupdir_, should_have_written);
test_backup_env_->AssertWrittenFiles(should_have_written);
}
ASSERT_OK(backup_engine_->DeleteBackup(1));
ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00010.sst"));
@ -890,6 +897,42 @@ TEST_F(BackupableDBTest, NoDoubleCopy) {
test_backup_env_->GetFileSize(backupdir_ + "/shared/00015.sst", &size);
ASSERT_EQ(200UL, size);
CloseBackupEngine();
//
// Now simulate incomplete delete by removing just meta
//
ASSERT_OK(test_backup_env_->DeleteFile(backupdir_ + "/meta/2"));
OpenBackupEngine();
// 1 appears to be removed, so
// 2 non-corrupt and 0 corrupt seen
std::vector<BackupInfo> backup_info;
std::vector<BackupID> corrupt_backup_ids;
backup_engine_->GetBackupInfo(&backup_info);
backup_engine_->GetCorruptedBackups(&corrupt_backup_ids);
ASSERT_EQ(2UL, backup_info.size());
ASSERT_EQ(0UL, corrupt_backup_ids.size());
// Keep the two we see, but this should suffice to purge unreferenced
// shared files from incomplete delete.
ASSERT_OK(backup_engine_->PurgeOldBackups(2));
// Make sure dangling sst file has been removed (somewhere along this
// process). GarbageCollect should not be needed.
ASSERT_EQ(Status::NotFound(),
test_backup_env_->FileExists(backupdir_ + "/shared/00015.sst"));
ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00017.sst"));
ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00019.sst"));
// Now actually purge a good one
ASSERT_OK(backup_engine_->PurgeOldBackups(1));
ASSERT_EQ(Status::NotFound(),
test_backup_env_->FileExists(backupdir_ + "/shared/00017.sst"));
ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00019.sst"));
CloseDBAndBackupEngine();
}
@ -972,7 +1015,8 @@ TEST_F(BackupableDBTest, CorruptionsTest) {
ASSERT_OK(backup_engine_->DeleteBackup(4));
ASSERT_OK(backup_engine_->DeleteBackup(3));
ASSERT_OK(backup_engine_->DeleteBackup(2));
(void)backup_engine_->GarbageCollect();
// Should not be needed anymore with auto-GC on DeleteBackup
//(void)backup_engine_->GarbageCollect();
ASSERT_EQ(Status::NotFound(),
file_manager_->FileExists(backupdir_ + "/meta/5"));
ASSERT_EQ(Status::NotFound(),
@ -1222,14 +1266,11 @@ TEST_F(BackupableDBTest, ShareTableFilesWithChecksumsTransition) {
}
TEST_F(BackupableDBTest, DeleteTmpFiles) {
for (int cleanup_fn : {1, 2, 3}) {
for (bool shared_checksum : {false, true}) {
if (shared_checksum) {
OpenDBAndBackupEngineShareWithChecksum(
false /* destroy_old_data */, false /* dummy */,
true /* share_table_files */, true /* share_with_checksums */);
} else {
OpenDBAndBackupEngine();
}
true /* share_table_files */, shared_checksum);
CloseDBAndBackupEngine();
std::string shared_tmp = backupdir_;
if (shared_checksum) {
@ -1251,13 +1292,27 @@ TEST_F(BackupableDBTest, DeleteTmpFiles) {
} else {
OpenDBAndBackupEngine();
}
// Need to call this explicitly to delete tmp files
// Need to call one of these explicitly to delete tmp files
switch (cleanup_fn) {
case 1:
(void)backup_engine_->GarbageCollect();
break;
case 2:
(void)backup_engine_->DeleteBackup(1);
break;
case 3:
(void)backup_engine_->PurgeOldBackups(1);
break;
default:
assert(false);
}
CloseDBAndBackupEngine();
ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(shared_tmp));
ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(private_tmp_file));
ASSERT_EQ(Status::NotFound(),
file_manager_->FileExists(private_tmp_file));
ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(private_tmp_dir));
}
}
}
TEST_F(BackupableDBTest, KeepLogFiles) {