Improve Backup Engine.

Summary:
Improve the backup engine by not deleting the corrupted
backup when it is detected; instead leaving it to the client
to delete the corrupted backup.

Also add a BackupEngine::Open() call.

Test Plan:
Add check to CorruptionTest inside backupable_db_test
to check that the corrupt backups are not deleted. The previous
version of the code failed this test as backups were deleted,
but after the changes in this commit, this test passes.

Run make check to ensure that no other tests fail.

Reviewers: sdong, benj, sanketh, sumeet, igor

Reviewed By: igor

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D28521
This commit is contained in:
Hasnain Lakhani 2014-11-13 09:51:41 -08:00
parent 1033db29f9
commit 31b02dc21d
4 changed files with 202 additions and 111 deletions

View File

@ -2,6 +2,9 @@
## Unreleased ## Unreleased
* BackupEngine::NewBackupEngine() was deprecated; please use BackupEngine::Open() from now on.
* BackupableDB/RestoreBackupableDB have new GarbageCollect() methods, which will clean up files from corrupt and obsolete backups.
* BackupableDB/RestoreBackupableDB have new GetCorruptedBackups() methods which list corrupt backups.
## 3.7.0 (11/6/2014) ## 3.7.0 (11/6/2014)
### Public API changes ### Public API changes

View File

@ -177,6 +177,8 @@ class BackupEngineReadOnly {
// You can GetBackupInfo safely, even with other BackupEngine performing // You can GetBackupInfo safely, even with other BackupEngine performing
// backups on the same directory // backups on the same directory
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0; virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
virtual void GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) = 0;
// Restoring DB from backup is NOT safe when there is another BackupEngine // Restoring DB from backup is NOT safe when there is another BackupEngine
// running that might call DeleteBackup() or PurgeOldBackups(). It is caller's // running that might call DeleteBackup() or PurgeOldBackups(). It is caller's
@ -196,7 +198,12 @@ class BackupEngine {
virtual ~BackupEngine() {} virtual ~BackupEngine() {}
static BackupEngine* NewBackupEngine(Env* db_env, static BackupEngine* NewBackupEngine(Env* db_env,
const BackupableDBOptions& options); const BackupableDBOptions& options)
__attribute__((deprecated("Please use Open() instead")));
static Status Open(Env* db_env,
const BackupableDBOptions& options,
BackupEngine** backup_engine_ptr);
virtual Status CreateNewBackup(DB* db, bool flush_before_backup = false) = 0; virtual Status CreateNewBackup(DB* db, bool flush_before_backup = false) = 0;
virtual Status PurgeOldBackups(uint32_t num_backups_to_keep) = 0; virtual Status PurgeOldBackups(uint32_t num_backups_to_keep) = 0;
@ -204,12 +211,16 @@ class BackupEngine {
virtual void StopBackup() = 0; virtual void StopBackup() = 0;
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0; virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
virtual void GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) = 0;
virtual Status RestoreDBFromBackup( virtual Status RestoreDBFromBackup(
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options = RestoreOptions()) = 0; const RestoreOptions& restore_options = RestoreOptions()) = 0;
virtual Status RestoreDBFromLatestBackup( virtual Status RestoreDBFromLatestBackup(
const std::string& db_dir, const std::string& wal_dir, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options = RestoreOptions()) = 0; const RestoreOptions& restore_options = RestoreOptions()) = 0;
virtual Status GarbageCollect() = 0;
}; };
// Stack your DB with BackupableDB to be able to backup the DB // Stack your DB with BackupableDB to be able to backup the DB
@ -228,6 +239,8 @@ class BackupableDB : public StackableDB {
Status CreateNewBackup(bool flush_before_backup = false); Status CreateNewBackup(bool flush_before_backup = false);
// Returns info about backups in backup_info // Returns info about backups in backup_info
void GetBackupInfo(std::vector<BackupInfo>* backup_info); void GetBackupInfo(std::vector<BackupInfo>* backup_info);
// Returns info about corrupt backups in corrupt_backups
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
// deletes old backups, keeping latest num_backups_to_keep alive // deletes old backups, keeping latest num_backups_to_keep alive
Status PurgeOldBackups(uint32_t num_backups_to_keep); Status PurgeOldBackups(uint32_t num_backups_to_keep);
// deletes a specific backup // deletes a specific backup
@ -241,6 +254,11 @@ class BackupableDB : public StackableDB {
// next time you create BackupableDB or RestoreBackupableDB. // next time you create BackupableDB or RestoreBackupableDB.
void StopBackup(); void StopBackup();
// Will delete all the files we don't need anymore
// It will do the full scan of the files/ directory and delete all the
// files that are not referenced.
Status GarbageCollect();
private: private:
BackupEngine* backup_engine_; BackupEngine* backup_engine_;
}; };
@ -253,6 +271,8 @@ class RestoreBackupableDB {
// Returns info about backups in backup_info // Returns info about backups in backup_info
void GetBackupInfo(std::vector<BackupInfo>* backup_info); void GetBackupInfo(std::vector<BackupInfo>* backup_info);
// Returns info about corrupt backups in corrupt_backups
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
// restore from backup with backup_id // restore from backup with backup_id
// IMPORTANT -- if options_.share_table_files == true and you restore DB // IMPORTANT -- if options_.share_table_files == true and you restore DB
@ -279,6 +299,11 @@ class RestoreBackupableDB {
// deletes a specific backup // deletes a specific backup
Status DeleteBackup(BackupID backup_id); Status DeleteBackup(BackupID backup_id);
// Will delete all the files we don't need anymore
// It will do the full scan of the files/ directory and delete all the
// files that are not referenced.
Status GarbageCollect();
private: private:
BackupEngine* backup_engine_; BackupEngine* backup_engine_;
}; };

View File

@ -121,8 +121,10 @@ class BackupEngineImpl : public BackupEngine {
void StopBackup() { void StopBackup() {
stop_backup_.store(true, std::memory_order_release); stop_backup_.store(true, std::memory_order_release);
} }
Status GarbageCollect();
void GetBackupInfo(std::vector<BackupInfo>* backup_info); void GetBackupInfo(std::vector<BackupInfo>* backup_info);
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir, Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir,
const std::string& wal_dir, const std::string& wal_dir,
const RestoreOptions& restore_options = const RestoreOptions& restore_options =
@ -285,16 +287,11 @@ class BackupEngineImpl : public BackupEngine {
uint64_t size_limit, uint64_t size_limit,
uint32_t* checksum_value); uint32_t* checksum_value);
// Will delete all the files we don't need anymore
// If full_scan == true, it will do the full scan of files/ directory
// and delete all the files that are not referenced from backuped_file_infos__
void GarbageCollection(bool full_scan);
// backup state data // backup state data
BackupID latest_backup_id_; BackupID latest_backup_id_;
std::map<BackupID, BackupMeta> backups_; std::map<BackupID, BackupMeta> backups_;
std::map<BackupID, std::pair<Status, BackupMeta> > corrupt_backups_;
std::unordered_map<std::string, FileInfo> backuped_file_infos_; std::unordered_map<std::string, FileInfo> backuped_file_infos_;
std::vector<BackupID> obsolete_backups_;
std::atomic<bool> stop_backup_; std::atomic<bool> stop_backup_;
// options data // options data
@ -319,6 +316,13 @@ BackupEngine* BackupEngine::NewBackupEngine(
return new BackupEngineImpl(db_env, options); return new BackupEngineImpl(db_env, options);
} }
Status BackupEngine::Open(Env* env,
const BackupableDBOptions& options,
BackupEngine** backup_engine_ptr) {
*backup_engine_ptr = new BackupEngineImpl(env, options);
return Status::OK();
}
BackupEngineImpl::BackupEngineImpl(Env* db_env, BackupEngineImpl::BackupEngineImpl(Env* db_env,
const BackupableDBOptions& options, const BackupableDBOptions& options,
bool read_only) bool read_only)
@ -377,14 +381,10 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
if (options_.destroy_old_data) { // Destory old data if (options_.destroy_old_data) { // Destory old data
assert(!read_only_); assert(!read_only_);
for (auto& backup : backups_) { PurgeOldBackups(0);
backup.second.Delete(); (void) GarbageCollect();
obsolete_backups_.push_back(backup.first);
}
backups_.clear();
// start from beginning // start from beginning
latest_backup_id_ = 0; latest_backup_id_ = 0;
// GarbageCollection() will do the actual deletion
} else { // Load data from storage } else { // Load data from storage
// load the backups if any // load the backups if any
for (auto& backup : backups_) { for (auto& backup : backups_) {
@ -392,16 +392,13 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
if (!s.ok()) { if (!s.ok()) {
Log(options_.info_log, "Backup %u corrupted -- %s", backup.first, Log(options_.info_log, "Backup %u corrupted -- %s", backup.first,
s.ToString().c_str()); s.ToString().c_str());
if (!read_only_) { corrupt_backups_.insert(std::make_pair(
Log(options_.info_log, "-> Deleting backup %u", backup.first); backup.first, std::make_pair(s, backup.second)));
}
backup.second.Delete(!read_only_);
obsolete_backups_.push_back(backup.first);
} }
} }
// delete obsolete backups from the structure
for (auto ob : obsolete_backups_) { for (auto corrupt : corrupt_backups_) {
backups_.erase(ob); backups_.erase(backups_.find(corrupt.first));
} }
Status s = GetLatestBackupFileContents(&latest_backup_id_); Status s = GetLatestBackupFileContents(&latest_backup_id_);
@ -417,16 +414,17 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
} }
// delete any backups that claim to be later than latest // delete any backups that claim to be later than latest
for (auto itr = backups_.upper_bound(latest_backup_id_); std::vector<BackupID> later_ids;
itr != backups_.end();) { for (auto itr = backups_.lower_bound(latest_backup_id_ + 1);
itr->second.Delete(); itr != backups_.end(); itr++) {
obsolete_backups_.push_back(itr->first); later_ids.push_back(itr->first);
itr = backups_.erase(itr); }
for (auto id : later_ids) {
DeleteBackup(id);
} }
if (!read_only_) { if (!read_only_) {
PutLatestBackupFileContents(latest_backup_id_); // Ignore errors PutLatestBackupFileContents(latest_backup_id_); // Ignore errors
GarbageCollection(true);
} }
Log(options_.info_log, "Initialized BackupEngine, the latest backup is %u.", Log(options_.info_log, "Initialized BackupEngine, the latest backup is %u.",
latest_backup_id_); latest_backup_id_);
@ -575,7 +573,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
Log(options_.info_log, "Backup Statistics %s\n", Log(options_.info_log, "Backup Statistics %s\n",
backup_statistics_.ToString().c_str()); backup_statistics_.ToString().c_str());
backups_.erase(new_backup_id); backups_.erase(new_backup_id);
GarbageCollection(true); (void) GarbageCollect();
return s; return s;
} }
@ -601,13 +599,15 @@ Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
assert(!read_only_); assert(!read_only_);
Log(options_.info_log, "Purging old backups, keeping %u", Log(options_.info_log, "Purging old backups, keeping %u",
num_backups_to_keep); num_backups_to_keep);
while (num_backups_to_keep < backups_.size()) { std::vector<BackupID> to_delete;
Log(options_.info_log, "Deleting backup %u", backups_.begin()->first); auto itr = backups_.begin();
backups_.begin()->second.Delete(); while ((backups_.size() - to_delete.size()) > num_backups_to_keep) {
obsolete_backups_.push_back(backups_.begin()->first); to_delete.push_back(itr->first);
backups_.erase(backups_.begin()); itr++;
}
for (auto backup_id : to_delete) {
DeleteBackup(backup_id);
} }
GarbageCollection(false);
return Status::OK(); return Status::OK();
} }
@ -615,13 +615,37 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
assert(!read_only_); assert(!read_only_);
Log(options_.info_log, "Deleting backup %u", backup_id); Log(options_.info_log, "Deleting backup %u", backup_id);
auto backup = backups_.find(backup_id); auto backup = backups_.find(backup_id);
if (backup == backups_.end()) { if (backup != backups_.end()) {
backup->second.Delete();
backups_.erase(backup);
} else {
auto corrupt = corrupt_backups_.find(backup_id);
if (corrupt == corrupt_backups_.end()) {
return Status::NotFound("Backup not found"); return Status::NotFound("Backup not found");
} }
backup->second.Delete(); corrupt->second.second.Delete();
obsolete_backups_.push_back(backup_id); corrupt_backups_.erase(corrupt);
backups_.erase(backup); }
GarbageCollection(false);
std::vector<std::string> to_delete;
for (auto& itr : backuped_file_infos_) {
if (itr.second.refs == 0) {
Status s = backup_env_->DeleteFile(GetAbsolutePath(itr.first));
Log(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
s.ToString().c_str());
to_delete.push_back(itr.first);
}
}
for (auto& td : to_delete) {
backuped_file_infos_.erase(td);
}
// take care of private dirs -- GarbageCollect() will take care of them
// if they are not empty
std::string private_dir = GetPrivateFileRel(backup_id);
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
Log(options_.info_log, "Deleting private dir %s -- %s",
private_dir.c_str(), s.ToString().c_str());
return Status::OK(); return Status::OK();
} }
@ -636,9 +660,22 @@ void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
} }
} }
void
BackupEngineImpl::GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) {
corrupt_backup_ids->reserve(corrupt_backups_.size());
for (auto& backup : corrupt_backups_) {
corrupt_backup_ids->push_back(backup.first);
}
}
Status BackupEngineImpl::RestoreDBFromBackup( Status BackupEngineImpl::RestoreDBFromBackup(
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options) { const RestoreOptions& restore_options) {
auto corrupt_itr = corrupt_backups_.find(backup_id);
if (corrupt_itr != corrupt_backups_.end()) {
return corrupt_itr->second.first;
}
auto backup_itr = backups_.find(backup_id); auto backup_itr = backups_.find(backup_id);
if (backup_itr == backups_.end()) { if (backup_itr == backups_.end()) {
return Status::NotFound("Backup not found"); return Status::NotFound("Backup not found");
@ -1005,35 +1042,10 @@ void BackupEngineImpl::DeleteChildren(const std::string& dir,
} }
} }
void BackupEngineImpl::GarbageCollection(bool full_scan) { Status BackupEngineImpl::GarbageCollect() {
assert(!read_only_); assert(!read_only_);
Log(options_.info_log, "Starting garbage collection"); Log(options_.info_log, "Starting garbage collection");
std::vector<std::string> to_delete;
for (auto& itr : backuped_file_infos_) {
if (itr.second.refs == 0) {
Status s = backup_env_->DeleteFile(GetAbsolutePath(itr.first));
Log(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
s.ToString().c_str());
to_delete.push_back(itr.first);
}
}
for (auto& td : to_delete) {
backuped_file_infos_.erase(td);
}
if (!full_scan) {
// take care of private dirs -- if full_scan == true, then full_scan will
// take care of them
for (auto backup_id : obsolete_backups_) {
std::string private_dir = GetPrivateFileRel(backup_id);
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
Log(options_.info_log, "Deleting private dir %s -- %s",
private_dir.c_str(), s.ToString().c_str());
}
}
obsolete_backups_.clear();
if (full_scan) {
Log(options_.info_log, "Starting full scan garbage collection");
// delete obsolete shared files // delete obsolete shared files
std::vector<std::string> shared_children; std::vector<std::string> shared_children;
backup_env_->GetChildren(GetAbsolutePath(GetSharedFileRel()), backup_env_->GetChildren(GetAbsolutePath(GetSharedFileRel()),
@ -1081,7 +1093,8 @@ void BackupEngineImpl::GarbageCollection(bool full_scan) {
Log(options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(), Log(options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(),
s.ToString().c_str()); s.ToString().c_str());
} }
}
return Status::OK();
} }
// ------- BackupMeta class -------- // ------- BackupMeta class --------
@ -1257,6 +1270,10 @@ class BackupEngineReadOnlyImpl : public BackupEngineReadOnly {
backup_engine_->GetBackupInfo(backup_info); backup_engine_->GetBackupInfo(backup_info);
} }
virtual void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) {
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
}
virtual Status RestoreDBFromBackup( virtual Status RestoreDBFromBackup(
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options = RestoreOptions()) { const RestoreOptions& restore_options = RestoreOptions()) {
@ -1302,6 +1319,11 @@ void BackupableDB::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
backup_engine_->GetBackupInfo(backup_info); backup_engine_->GetBackupInfo(backup_info);
} }
void
BackupableDB::GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) {
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
}
Status BackupableDB::PurgeOldBackups(uint32_t num_backups_to_keep) { Status BackupableDB::PurgeOldBackups(uint32_t num_backups_to_keep) {
return backup_engine_->PurgeOldBackups(num_backups_to_keep); return backup_engine_->PurgeOldBackups(num_backups_to_keep);
} }
@ -1314,6 +1336,10 @@ void BackupableDB::StopBackup() {
backup_engine_->StopBackup(); backup_engine_->StopBackup();
} }
Status BackupableDB::GarbageCollect() {
return backup_engine_->GarbageCollect();
}
// --- RestoreBackupableDB methods ------ // --- RestoreBackupableDB methods ------
RestoreBackupableDB::RestoreBackupableDB(Env* db_env, RestoreBackupableDB::RestoreBackupableDB(Env* db_env,
@ -1329,6 +1355,11 @@ RestoreBackupableDB::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
backup_engine_->GetBackupInfo(backup_info); backup_engine_->GetBackupInfo(backup_info);
} }
void RestoreBackupableDB::GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) {
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
}
Status RestoreBackupableDB::RestoreDBFromBackup( Status RestoreBackupableDB::RestoreDBFromBackup(
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir, BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options) { const RestoreOptions& restore_options) {
@ -1351,6 +1382,10 @@ Status RestoreBackupableDB::DeleteBackup(BackupID backup_id) {
return backup_engine_->DeleteBackup(backup_id); return backup_engine_->DeleteBackup(backup_id);
} }
Status RestoreBackupableDB::GarbageCollect() {
return backup_engine_->GarbageCollect();
}
} // namespace rocksdb } // namespace rocksdb
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

View File

@ -646,6 +646,32 @@ TEST(BackupableDBTest, CorruptionsTest) {
ASSERT_OK(db_->CreateNewBackup(!!(rnd.Next() % 2))); ASSERT_OK(db_->CreateNewBackup(!!(rnd.Next() % 2)));
CloseBackupableDB(); CloseBackupableDB();
AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5); AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5);
// make sure that no corrupt backups have actually been deleted!
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/1"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/2"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/3"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/4"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/5"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/1"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/2"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/3"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/4"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/5"));
// delete the corrupt backups and then make sure they're actually deleted
OpenBackupableDB();
ASSERT_OK(db_->DeleteBackup(5));
ASSERT_OK(db_->DeleteBackup(4));
ASSERT_OK(db_->DeleteBackup(3));
(void) db_->GarbageCollect();
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/5") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/5") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/4") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/4") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/3") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/3") == false);
CloseBackupableDB();
} }
// open DB, write, close DB, backup, restore, repeat // open DB, write, close DB, backup, restore, repeat
@ -867,6 +893,8 @@ TEST(BackupableDBTest, DeleteTmpFiles) {
file_manager_->WriteToFile(private_tmp_file, "tmp"); file_manager_->WriteToFile(private_tmp_file, "tmp");
ASSERT_EQ(true, file_manager_->FileExists(private_tmp_dir)); ASSERT_EQ(true, file_manager_->FileExists(private_tmp_dir));
OpenBackupableDB(); OpenBackupableDB();
// Need to call this explicitly to delete tmp files
(void) db_->GarbageCollect();
CloseBackupableDB(); CloseBackupableDB();
ASSERT_EQ(false, file_manager_->FileExists(shared_tmp)); ASSERT_EQ(false, file_manager_->FileExists(shared_tmp));
ASSERT_EQ(false, file_manager_->FileExists(private_tmp_file)); ASSERT_EQ(false, file_manager_->FileExists(private_tmp_file));