From a6d418384d58212f57052fe21362e0f748e552ff Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 8 Nov 2019 19:13:41 -0800 Subject: [PATCH] Auto-GarbageCollect on PurgeOldBackups and DeleteBackup (#6015) Summary: Only if there is a crash, power failure, or I/O error in DeleteBackup, shared or private files from the backup might be left behind that are not cleaned up by PurgeOldBackups or DeleteBackup-- only by GarbageCollect. This makes the BackupEngine API "leaky by default." Even if it means a modest performance hit, I think we should make Delete and Purge do as they say, with ongoing best effort: i.e. future calls will attempt to finish any incomplete work from earlier calls. This change does that by having DeleteBackup and PurgeOldBackups do a GarbageCollect, unless (to minimize performance hit) this BackupEngine has already done a GarbageCollect and there have been no deletion-related I/O errors in that GarbageCollect or since then. Rejected alternative 1: remove meta file last instead of first. This would in theory turn partially deleted backups into corrupted backups, but code changes would be needed to allow the missing files and consider it acceptably corrupt, rather than failing to open the BackupEngine. This might be a reasonable choice, but I mostly rejected it because it doesn't solve the legacy problem of cleaning up existing lingering files. Rejected alternative 2: use a deletion marker file. If deletion started with creating a file that marks a backup as flagged for deletion, then we could reliably detect partially deleted backups and efficiently finish removing them. In addition to not solving the legacy problem, this could be precarious if there's a disk full situation, and we try to create a new file in order to delete some files. Ugh. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6015 Test Plan: Updated unit tests Differential Revision: D18401333 Pulled By: pdillinger fbshipit-source-id: 12944e372ce6809f3f5a4c416c3b321a8927d925 --- HISTORY.md | 1 + include/rocksdb/utilities/backupable_db.h | 5 +- utilities/backupable/backupable_db.cc | 121 ++++++++++++---- utilities/backupable/backupable_db_test.cc | 153 ++++++++++++++------- 4 files changed, 207 insertions(+), 73 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index ac8a9f825..34fc09f19 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### Bug Fixes * Fix a assertion failure in MultiGe4t() when BlockBasedTableOptions::no_block_cache is true and there is no compressed block cache * Fix a buffer overrun problem in BlockBasedTable::MultiGet() when compression is enabled and no compressed block cache is configured. +* If a call to BackupEngine::PurgeOldBackups or BackupEngine::DeleteBackup suffered a crash, power failure, or I/O error, files could be left over from old backups that could only be purged with a call to GarbageCollect. Any call to PurgeOldBackups, DeleteBackup, or GarbageCollect should now suffice to purge such files. ## 6.5.1 (10/16/2019) ### Bug Fixes diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index 1ca4fc9a6..1c810d7af 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -325,7 +325,10 @@ class BackupEngine { // Will delete all the files we don't need anymore // It will do the full scan of the files/ directory and delete all the - // files that are not referenced. + // files that are not referenced. PurgeOldBackups() and DeleteBackup() + // will do a similar operation as needed to clean up from any incomplete + // deletions, so this function is not really needed if calling one of + // those. virtual Status GarbageCollect() = 0; }; diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index b7592a0ce..5ff5af255 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -9,24 +9,10 @@ #ifndef ROCKSDB_LITE -#include "rocksdb/utilities/backupable_db.h" -#include "file/filename.h" -#include "logging/logging.h" -#include "port/port.h" -#include "rocksdb/rate_limiter.h" -#include "rocksdb/transaction_log.h" -#include "test_util/sync_point.h" -#include "util/channel.h" -#include "util/coding.h" -#include "util/crc32c.h" -#include "util/file_reader_writer.h" -#include "util/string_util.h" -#include "utilities/checkpoint/checkpoint_impl.h" - -#include #include #include #include +#include #include #include #include @@ -39,6 +25,20 @@ #include #include +#include "file/filename.h" +#include "logging/logging.h" +#include "port/port.h" +#include "rocksdb/rate_limiter.h" +#include "rocksdb/transaction_log.h" +#include "rocksdb/utilities/backupable_db.h" +#include "test_util/sync_point.h" +#include "util/channel.h" +#include "util/coding.h" +#include "util/crc32c.h" +#include "util/file_reader_writer.h" +#include "util/string_util.h" +#include "utilities/checkpoint/checkpoint_impl.h" + namespace rocksdb { void BackupStatistics::IncrementNumberSuccessBackup() { @@ -120,6 +120,7 @@ class BackupEngineImpl : public BackupEngine { private: void DeleteChildren(const std::string& dir, uint32_t file_type_filter = 0); + Status DeleteBackupInternal(BackupID backup_id); // Extends the "result" map with pathname->size mappings for the contents of // "dir" in "env". Pathnames are prefixed with "dir". @@ -456,6 +457,10 @@ class BackupEngineImpl : public BackupEngine { std::mutex byte_report_mutex_; channel files_to_copy_or_create_; std::vector threads_; + // Certain operations like PurgeOldBackups and DeleteBackup will trigger + // automatic GarbageCollect (true) unless we've already done one in this + // session and have not failed to delete backup files since then (false). + bool might_need_garbage_collect_ = true; // Adds a file to the backup work queue to be copied or created if it doesn't // already exist. @@ -559,6 +564,9 @@ Status BackupEngineImpl::Initialize() { options_.Dump(options_.info_log); if (!read_only_) { + // we might need to clean up from previous crash or I/O errors + might_need_garbage_collect_ = true; + // gather the list of directories that we need to create std::vector*>> directories; @@ -928,8 +936,8 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( ROCKS_LOG_INFO(options_.info_log, "Backup Statistics %s\n", backup_statistics_.ToString().c_str()); // delete files that we might have already written + might_need_garbage_collect_ = true; DeleteBackup(new_backup_id); - GarbageCollect(); return s; } @@ -957,6 +965,10 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata( Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) { assert(initialized_); assert(!read_only_); + + // Best effort deletion even with errors + Status overall_status = Status::OK(); + ROCKS_LOG_INFO(options_.info_log, "Purging old backups, keeping %u", num_backups_to_keep); std::vector to_delete; @@ -966,17 +978,44 @@ Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) { itr++; } for (auto backup_id : to_delete) { - auto s = DeleteBackup(backup_id); + auto s = DeleteBackupInternal(backup_id); if (!s.ok()) { - return s; + overall_status = s; } } - return Status::OK(); + // Clean up after any incomplete backup deletion, potentially from + // earlier session. + if (might_need_garbage_collect_) { + auto s = GarbageCollect(); + if (!s.ok() && overall_status.ok()) { + overall_status = s; + } + } + return overall_status; } Status BackupEngineImpl::DeleteBackup(BackupID backup_id) { + auto s1 = DeleteBackupInternal(backup_id); + auto s2 = Status::OK(); + + // Clean up after any incomplete backup deletion, potentially from + // earlier session. + if (might_need_garbage_collect_) { + s2 = GarbageCollect(); + } + + if (!s1.ok()) { + return s1; + } else { + return s2; + } +} + +// Does not auto-GarbageCollect +Status BackupEngineImpl::DeleteBackupInternal(BackupID backup_id) { assert(initialized_); assert(!read_only_); + ROCKS_LOG_INFO(options_.info_log, "Deleting backup %u", backup_id); auto backup = backups_.find(backup_id); if (backup != backups_.end()) { @@ -997,6 +1036,10 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) { corrupt_backups_.erase(corrupt); } + // After removing meta file, best effort deletion even with errors. + // (Don't delete other files if we can't delete the meta file right + // now.) + if (options_.max_valid_backups_to_open == port::kMaxInt32) { std::vector to_delete; for (auto& itr : backuped_file_infos_) { @@ -1005,6 +1048,10 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) { ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s", itr.first.c_str(), s.ToString().c_str()); to_delete.push_back(itr.first); + if (!s.ok()) { + // Trying again later might work + might_need_garbage_collect_ = true; + } } } for (auto& td : to_delete) { @@ -1023,6 +1070,10 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) { Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir)); ROCKS_LOG_INFO(options_.info_log, "Deleting private dir %s -- %s", private_dir.c_str(), s.ToString().c_str()); + if (!s.ok()) { + // Full gc or trying again later might work + might_need_garbage_collect_ = true; + } return Status::OK(); } @@ -1505,8 +1556,15 @@ Status BackupEngineImpl::InsertPathnameToSizeBytes( Status BackupEngineImpl::GarbageCollect() { assert(!read_only_); + + // We will make a best effort to remove all garbage even in the presence + // of inconsistencies or I/O failures that inhibit finding garbage. + Status overall_status = Status::OK(); + // If all goes well, we don't need another auto-GC this session + might_need_garbage_collect_ = false; + ROCKS_LOG_INFO(options_.info_log, "Starting garbage collection"); - if (options_.max_valid_backups_to_open == port::kMaxInt32) { + if (options_.max_valid_backups_to_open != port::kMaxInt32) { ROCKS_LOG_WARN( options_.info_log, "Garbage collection is limited since `max_valid_backups_to_open` " @@ -1533,7 +1591,9 @@ Status BackupEngineImpl::GarbageCollect() { s = Status::OK(); } if (!s.ok()) { - return s; + overall_status = s; + // Trying again later might work + might_need_garbage_collect_ = true; } } for (auto& child : shared_children) { @@ -1553,6 +1613,10 @@ Status BackupEngineImpl::GarbageCollect() { ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s", rel_fname.c_str(), s.ToString().c_str()); backuped_file_infos_.erase(rel_fname); + if (!s.ok()) { + // Trying again later might work + might_need_garbage_collect_ = true; + } } } } @@ -1563,7 +1627,9 @@ Status BackupEngineImpl::GarbageCollect() { auto s = backup_env_->GetChildren(GetAbsolutePath(GetPrivateDirRel()), &private_children); if (!s.ok()) { - return s; + overall_status = s; + // Trying again later might work + might_need_garbage_collect_ = true; } } for (auto& child : private_children) { @@ -1587,14 +1653,23 @@ Status BackupEngineImpl::GarbageCollect() { ROCKS_LOG_INFO(options_.info_log, "Deleting %s -- %s", (full_private_path + subchild).c_str(), s.ToString().c_str()); + if (!s.ok()) { + // Trying again later might work + might_need_garbage_collect_ = true; + } } // finally delete the private dir Status s = backup_env_->DeleteDir(full_private_path); ROCKS_LOG_INFO(options_.info_log, "Deleting dir %s -- %s", full_private_path.c_str(), s.ToString().c_str()); + if (!s.ok()) { + // Trying again later might work + might_need_garbage_collect_ = true; + } } - return Status::OK(); + assert(overall_status.ok() || might_need_garbage_collect_); + return overall_status; } // ------- BackupMeta class -------- diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 37d9e4cd1..7fcc87863 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -839,7 +839,7 @@ INSTANTIATE_TEST_CASE_P(BackupableDBTestWithParam, BackupableDBTestWithParam, ::testing::Bool()); // this will make sure that backup does not copy the same file twice -TEST_F(BackupableDBTest, NoDoubleCopy) { +TEST_F(BackupableDBTest, NoDoubleCopy_And_AutoGC) { OpenDBAndBackupEngine(true, true); // should write 5 DB files + one meta file @@ -857,23 +857,30 @@ TEST_F(BackupableDBTest, NoDoubleCopy) { AppendPath(backupdir_, should_have_written); test_backup_env_->AssertWrittenFiles(should_have_written); - // should write 4 new DB files + one meta file - // should not write/copy 00010.sst, since it's already there! - test_backup_env_->SetLimitWrittenFiles(6); - test_backup_env_->ClearWrittenFiles(); + char db_number = '1'; - dummy_db_->live_files_ = {"/00010.sst", "/00015.sst", "/CURRENT", - "/MANIFEST-01"}; - dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}}; - test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); - ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); - // should not open 00010.sst - it's already there + for (std::string other_sst : {"00015.sst", "00017.sst", "00019.sst"}) { + // should write 4 new DB files + one meta file + // should not write/copy 00010.sst, since it's already there! + test_backup_env_->SetLimitWrittenFiles(6); + test_backup_env_->ClearWrittenFiles(); - should_have_written = {"/shared/.00015.sst.tmp", "/private/2/CURRENT", - "/private/2/MANIFEST-01", "/private/2/00011.log", - "/meta/.2.tmp"}; - AppendPath(backupdir_, should_have_written); - test_backup_env_->AssertWrittenFiles(should_have_written); + dummy_db_->live_files_ = {"/00010.sst", "/" + other_sst, "/CURRENT", + "/MANIFEST-01"}; + dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}}; + test_db_env_->SetFilenamesForMockedAttrs(dummy_db_->live_files_); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), false)); + // should not open 00010.sst - it's already there + + ++db_number; + std::string private_dir = std::string("/private/") + db_number; + should_have_written = { + "/shared/." + other_sst + ".tmp", private_dir + "/CURRENT", + private_dir + "/MANIFEST-01", private_dir + "/00011.log", + std::string("/meta/.") + db_number + ".tmp"}; + AppendPath(backupdir_, should_have_written); + test_backup_env_->AssertWrittenFiles(should_have_written); + } ASSERT_OK(backup_engine_->DeleteBackup(1)); ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00010.sst")); @@ -890,6 +897,42 @@ TEST_F(BackupableDBTest, NoDoubleCopy) { test_backup_env_->GetFileSize(backupdir_ + "/shared/00015.sst", &size); ASSERT_EQ(200UL, size); + CloseBackupEngine(); + + // + // Now simulate incomplete delete by removing just meta + // + ASSERT_OK(test_backup_env_->DeleteFile(backupdir_ + "/meta/2")); + + OpenBackupEngine(); + + // 1 appears to be removed, so + // 2 non-corrupt and 0 corrupt seen + std::vector backup_info; + std::vector corrupt_backup_ids; + backup_engine_->GetBackupInfo(&backup_info); + backup_engine_->GetCorruptedBackups(&corrupt_backup_ids); + ASSERT_EQ(2UL, backup_info.size()); + ASSERT_EQ(0UL, corrupt_backup_ids.size()); + + // Keep the two we see, but this should suffice to purge unreferenced + // shared files from incomplete delete. + ASSERT_OK(backup_engine_->PurgeOldBackups(2)); + + // Make sure dangling sst file has been removed (somewhere along this + // process). GarbageCollect should not be needed. + ASSERT_EQ(Status::NotFound(), + test_backup_env_->FileExists(backupdir_ + "/shared/00015.sst")); + ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00017.sst")); + ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00019.sst")); + + // Now actually purge a good one + ASSERT_OK(backup_engine_->PurgeOldBackups(1)); + + ASSERT_EQ(Status::NotFound(), + test_backup_env_->FileExists(backupdir_ + "/shared/00017.sst")); + ASSERT_OK(test_backup_env_->FileExists(backupdir_ + "/shared/00019.sst")); + CloseDBAndBackupEngine(); } @@ -972,7 +1015,8 @@ TEST_F(BackupableDBTest, CorruptionsTest) { ASSERT_OK(backup_engine_->DeleteBackup(4)); ASSERT_OK(backup_engine_->DeleteBackup(3)); ASSERT_OK(backup_engine_->DeleteBackup(2)); - (void)backup_engine_->GarbageCollect(); + // Should not be needed anymore with auto-GC on DeleteBackup + //(void)backup_engine_->GarbageCollect(); ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(backupdir_ + "/meta/5")); ASSERT_EQ(Status::NotFound(), @@ -1222,41 +1266,52 @@ TEST_F(BackupableDBTest, ShareTableFilesWithChecksumsTransition) { } TEST_F(BackupableDBTest, DeleteTmpFiles) { - for (bool shared_checksum : {false, true}) { - if (shared_checksum) { + for (int cleanup_fn : {1, 2, 3}) { + for (bool shared_checksum : {false, true}) { OpenDBAndBackupEngineShareWithChecksum( false /* destroy_old_data */, false /* dummy */, - true /* share_table_files */, true /* share_with_checksums */); - } else { - OpenDBAndBackupEngine(); + true /* share_table_files */, shared_checksum); + CloseDBAndBackupEngine(); + std::string shared_tmp = backupdir_; + if (shared_checksum) { + shared_tmp += "/shared_checksum"; + } else { + shared_tmp += "/shared"; + } + shared_tmp += "/.00006.sst.tmp"; + std::string private_tmp_dir = backupdir_ + "/private/10"; + std::string private_tmp_file = private_tmp_dir + "/00003.sst"; + file_manager_->WriteToFile(shared_tmp, "tmp"); + file_manager_->CreateDir(private_tmp_dir); + file_manager_->WriteToFile(private_tmp_file, "tmp"); + ASSERT_OK(file_manager_->FileExists(private_tmp_dir)); + if (shared_checksum) { + OpenDBAndBackupEngineShareWithChecksum( + false /* destroy_old_data */, false /* dummy */, + true /* share_table_files */, true /* share_with_checksums */); + } else { + OpenDBAndBackupEngine(); + } + // Need to call one of these explicitly to delete tmp files + switch (cleanup_fn) { + case 1: + (void)backup_engine_->GarbageCollect(); + break; + case 2: + (void)backup_engine_->DeleteBackup(1); + break; + case 3: + (void)backup_engine_->PurgeOldBackups(1); + break; + default: + assert(false); + } + CloseDBAndBackupEngine(); + ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(shared_tmp)); + ASSERT_EQ(Status::NotFound(), + file_manager_->FileExists(private_tmp_file)); + ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(private_tmp_dir)); } - CloseDBAndBackupEngine(); - std::string shared_tmp = backupdir_; - if (shared_checksum) { - shared_tmp += "/shared_checksum"; - } else { - shared_tmp += "/shared"; - } - shared_tmp += "/.00006.sst.tmp"; - std::string private_tmp_dir = backupdir_ + "/private/10"; - std::string private_tmp_file = private_tmp_dir + "/00003.sst"; - file_manager_->WriteToFile(shared_tmp, "tmp"); - file_manager_->CreateDir(private_tmp_dir); - file_manager_->WriteToFile(private_tmp_file, "tmp"); - ASSERT_OK(file_manager_->FileExists(private_tmp_dir)); - if (shared_checksum) { - OpenDBAndBackupEngineShareWithChecksum( - false /* destroy_old_data */, false /* dummy */, - true /* share_table_files */, true /* share_with_checksums */); - } else { - OpenDBAndBackupEngine(); - } - // Need to call this explicitly to delete tmp files - (void)backup_engine_->GarbageCollect(); - CloseDBAndBackupEngine(); - ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(shared_tmp)); - ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(private_tmp_file)); - ASSERT_EQ(Status::NotFound(), file_manager_->FileExists(private_tmp_dir)); } }