Improve Backup Engine.
Summary: Improve the backup engine by not deleting the corrupted backup when it is detected; instead leaving it to the client to delete the corrupted backup. Also add a BackupEngine::Open() call. Test Plan: Add check to CorruptionTest inside backupable_db_test to check that the corrupt backups are not deleted. The previous version of the code failed this test as backups were deleted, but after the changes in this commit, this test passes. Run make check to ensure that no other tests fail. Reviewers: sdong, benj, sanketh, sumeet, igor Reviewed By: igor Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D28521
This commit is contained in:
parent
1033db29f9
commit
31b02dc21d
@ -2,6 +2,9 @@
|
||||
|
||||
## Unreleased
|
||||
|
||||
* BackupEngine::NewBackupEngine() was deprecated; please use BackupEngine::Open() from now on.
|
||||
* BackupableDB/RestoreBackupableDB have new GarbageCollect() methods, which will clean up files from corrupt and obsolete backups.
|
||||
* BackupableDB/RestoreBackupableDB have new GetCorruptedBackups() methods which list corrupt backups.
|
||||
|
||||
## 3.7.0 (11/6/2014)
|
||||
### Public API changes
|
||||
|
@ -177,6 +177,8 @@ class BackupEngineReadOnly {
|
||||
// You can GetBackupInfo safely, even with other BackupEngine performing
|
||||
// backups on the same directory
|
||||
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
|
||||
virtual void GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) = 0;
|
||||
|
||||
// Restoring DB from backup is NOT safe when there is another BackupEngine
|
||||
// running that might call DeleteBackup() or PurgeOldBackups(). It is caller's
|
||||
@ -196,7 +198,12 @@ class BackupEngine {
|
||||
virtual ~BackupEngine() {}
|
||||
|
||||
static BackupEngine* NewBackupEngine(Env* db_env,
|
||||
const BackupableDBOptions& options);
|
||||
const BackupableDBOptions& options)
|
||||
__attribute__((deprecated("Please use Open() instead")));
|
||||
|
||||
static Status Open(Env* db_env,
|
||||
const BackupableDBOptions& options,
|
||||
BackupEngine** backup_engine_ptr);
|
||||
|
||||
virtual Status CreateNewBackup(DB* db, bool flush_before_backup = false) = 0;
|
||||
virtual Status PurgeOldBackups(uint32_t num_backups_to_keep) = 0;
|
||||
@ -204,12 +211,16 @@ class BackupEngine {
|
||||
virtual void StopBackup() = 0;
|
||||
|
||||
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
|
||||
virtual void GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) = 0;
|
||||
virtual Status RestoreDBFromBackup(
|
||||
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
|
||||
const RestoreOptions& restore_options = RestoreOptions()) = 0;
|
||||
virtual Status RestoreDBFromLatestBackup(
|
||||
const std::string& db_dir, const std::string& wal_dir,
|
||||
const RestoreOptions& restore_options = RestoreOptions()) = 0;
|
||||
|
||||
virtual Status GarbageCollect() = 0;
|
||||
};
|
||||
|
||||
// Stack your DB with BackupableDB to be able to backup the DB
|
||||
@ -228,6 +239,8 @@ class BackupableDB : public StackableDB {
|
||||
Status CreateNewBackup(bool flush_before_backup = false);
|
||||
// Returns info about backups in backup_info
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
|
||||
// Returns info about corrupt backups in corrupt_backups
|
||||
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
|
||||
// deletes old backups, keeping latest num_backups_to_keep alive
|
||||
Status PurgeOldBackups(uint32_t num_backups_to_keep);
|
||||
// deletes a specific backup
|
||||
@ -241,6 +254,11 @@ class BackupableDB : public StackableDB {
|
||||
// next time you create BackupableDB or RestoreBackupableDB.
|
||||
void StopBackup();
|
||||
|
||||
// Will delete all the files we don't need anymore
|
||||
// It will do the full scan of the files/ directory and delete all the
|
||||
// files that are not referenced.
|
||||
Status GarbageCollect();
|
||||
|
||||
private:
|
||||
BackupEngine* backup_engine_;
|
||||
};
|
||||
@ -253,6 +271,8 @@ class RestoreBackupableDB {
|
||||
|
||||
// Returns info about backups in backup_info
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
|
||||
// Returns info about corrupt backups in corrupt_backups
|
||||
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
|
||||
|
||||
// restore from backup with backup_id
|
||||
// IMPORTANT -- if options_.share_table_files == true and you restore DB
|
||||
@ -279,6 +299,11 @@ class RestoreBackupableDB {
|
||||
// deletes a specific backup
|
||||
Status DeleteBackup(BackupID backup_id);
|
||||
|
||||
// Will delete all the files we don't need anymore
|
||||
// It will do the full scan of the files/ directory and delete all the
|
||||
// files that are not referenced.
|
||||
Status GarbageCollect();
|
||||
|
||||
private:
|
||||
BackupEngine* backup_engine_;
|
||||
};
|
||||
|
@ -121,8 +121,10 @@ class BackupEngineImpl : public BackupEngine {
|
||||
void StopBackup() {
|
||||
stop_backup_.store(true, std::memory_order_release);
|
||||
}
|
||||
Status GarbageCollect();
|
||||
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
|
||||
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
|
||||
Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir,
|
||||
const std::string& wal_dir,
|
||||
const RestoreOptions& restore_options =
|
||||
@ -285,16 +287,11 @@ class BackupEngineImpl : public BackupEngine {
|
||||
uint64_t size_limit,
|
||||
uint32_t* checksum_value);
|
||||
|
||||
// Will delete all the files we don't need anymore
|
||||
// If full_scan == true, it will do the full scan of files/ directory
|
||||
// and delete all the files that are not referenced from backuped_file_infos__
|
||||
void GarbageCollection(bool full_scan);
|
||||
|
||||
// backup state data
|
||||
BackupID latest_backup_id_;
|
||||
std::map<BackupID, BackupMeta> backups_;
|
||||
std::map<BackupID, std::pair<Status, BackupMeta> > corrupt_backups_;
|
||||
std::unordered_map<std::string, FileInfo> backuped_file_infos_;
|
||||
std::vector<BackupID> obsolete_backups_;
|
||||
std::atomic<bool> stop_backup_;
|
||||
|
||||
// options data
|
||||
@ -319,6 +316,13 @@ BackupEngine* BackupEngine::NewBackupEngine(
|
||||
return new BackupEngineImpl(db_env, options);
|
||||
}
|
||||
|
||||
Status BackupEngine::Open(Env* env,
|
||||
const BackupableDBOptions& options,
|
||||
BackupEngine** backup_engine_ptr) {
|
||||
*backup_engine_ptr = new BackupEngineImpl(env, options);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
BackupEngineImpl::BackupEngineImpl(Env* db_env,
|
||||
const BackupableDBOptions& options,
|
||||
bool read_only)
|
||||
@ -377,14 +381,10 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
|
||||
|
||||
if (options_.destroy_old_data) { // Destory old data
|
||||
assert(!read_only_);
|
||||
for (auto& backup : backups_) {
|
||||
backup.second.Delete();
|
||||
obsolete_backups_.push_back(backup.first);
|
||||
}
|
||||
backups_.clear();
|
||||
PurgeOldBackups(0);
|
||||
(void) GarbageCollect();
|
||||
// start from beginning
|
||||
latest_backup_id_ = 0;
|
||||
// GarbageCollection() will do the actual deletion
|
||||
} else { // Load data from storage
|
||||
// load the backups if any
|
||||
for (auto& backup : backups_) {
|
||||
@ -392,16 +392,13 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
|
||||
if (!s.ok()) {
|
||||
Log(options_.info_log, "Backup %u corrupted -- %s", backup.first,
|
||||
s.ToString().c_str());
|
||||
if (!read_only_) {
|
||||
Log(options_.info_log, "-> Deleting backup %u", backup.first);
|
||||
}
|
||||
backup.second.Delete(!read_only_);
|
||||
obsolete_backups_.push_back(backup.first);
|
||||
corrupt_backups_.insert(std::make_pair(
|
||||
backup.first, std::make_pair(s, backup.second)));
|
||||
}
|
||||
}
|
||||
// delete obsolete backups from the structure
|
||||
for (auto ob : obsolete_backups_) {
|
||||
backups_.erase(ob);
|
||||
|
||||
for (auto corrupt : corrupt_backups_) {
|
||||
backups_.erase(backups_.find(corrupt.first));
|
||||
}
|
||||
|
||||
Status s = GetLatestBackupFileContents(&latest_backup_id_);
|
||||
@ -417,16 +414,17 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
|
||||
}
|
||||
|
||||
// delete any backups that claim to be later than latest
|
||||
for (auto itr = backups_.upper_bound(latest_backup_id_);
|
||||
itr != backups_.end();) {
|
||||
itr->second.Delete();
|
||||
obsolete_backups_.push_back(itr->first);
|
||||
itr = backups_.erase(itr);
|
||||
std::vector<BackupID> later_ids;
|
||||
for (auto itr = backups_.lower_bound(latest_backup_id_ + 1);
|
||||
itr != backups_.end(); itr++) {
|
||||
later_ids.push_back(itr->first);
|
||||
}
|
||||
for (auto id : later_ids) {
|
||||
DeleteBackup(id);
|
||||
}
|
||||
|
||||
if (!read_only_) {
|
||||
PutLatestBackupFileContents(latest_backup_id_); // Ignore errors
|
||||
GarbageCollection(true);
|
||||
}
|
||||
Log(options_.info_log, "Initialized BackupEngine, the latest backup is %u.",
|
||||
latest_backup_id_);
|
||||
@ -575,7 +573,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
|
||||
Log(options_.info_log, "Backup Statistics %s\n",
|
||||
backup_statistics_.ToString().c_str());
|
||||
backups_.erase(new_backup_id);
|
||||
GarbageCollection(true);
|
||||
(void) GarbageCollect();
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -601,13 +599,15 @@ Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
|
||||
assert(!read_only_);
|
||||
Log(options_.info_log, "Purging old backups, keeping %u",
|
||||
num_backups_to_keep);
|
||||
while (num_backups_to_keep < backups_.size()) {
|
||||
Log(options_.info_log, "Deleting backup %u", backups_.begin()->first);
|
||||
backups_.begin()->second.Delete();
|
||||
obsolete_backups_.push_back(backups_.begin()->first);
|
||||
backups_.erase(backups_.begin());
|
||||
std::vector<BackupID> to_delete;
|
||||
auto itr = backups_.begin();
|
||||
while ((backups_.size() - to_delete.size()) > num_backups_to_keep) {
|
||||
to_delete.push_back(itr->first);
|
||||
itr++;
|
||||
}
|
||||
for (auto backup_id : to_delete) {
|
||||
DeleteBackup(backup_id);
|
||||
}
|
||||
GarbageCollection(false);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -615,13 +615,37 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
|
||||
assert(!read_only_);
|
||||
Log(options_.info_log, "Deleting backup %u", backup_id);
|
||||
auto backup = backups_.find(backup_id);
|
||||
if (backup == backups_.end()) {
|
||||
if (backup != backups_.end()) {
|
||||
backup->second.Delete();
|
||||
backups_.erase(backup);
|
||||
} else {
|
||||
auto corrupt = corrupt_backups_.find(backup_id);
|
||||
if (corrupt == corrupt_backups_.end()) {
|
||||
return Status::NotFound("Backup not found");
|
||||
}
|
||||
backup->second.Delete();
|
||||
obsolete_backups_.push_back(backup_id);
|
||||
backups_.erase(backup);
|
||||
GarbageCollection(false);
|
||||
corrupt->second.second.Delete();
|
||||
corrupt_backups_.erase(corrupt);
|
||||
}
|
||||
|
||||
std::vector<std::string> to_delete;
|
||||
for (auto& itr : backuped_file_infos_) {
|
||||
if (itr.second.refs == 0) {
|
||||
Status s = backup_env_->DeleteFile(GetAbsolutePath(itr.first));
|
||||
Log(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
|
||||
s.ToString().c_str());
|
||||
to_delete.push_back(itr.first);
|
||||
}
|
||||
}
|
||||
for (auto& td : to_delete) {
|
||||
backuped_file_infos_.erase(td);
|
||||
}
|
||||
|
||||
// take care of private dirs -- GarbageCollect() will take care of them
|
||||
// if they are not empty
|
||||
std::string private_dir = GetPrivateFileRel(backup_id);
|
||||
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
|
||||
Log(options_.info_log, "Deleting private dir %s -- %s",
|
||||
private_dir.c_str(), s.ToString().c_str());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -636,9 +660,22 @@ void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BackupEngineImpl::GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) {
|
||||
corrupt_backup_ids->reserve(corrupt_backups_.size());
|
||||
for (auto& backup : corrupt_backups_) {
|
||||
corrupt_backup_ids->push_back(backup.first);
|
||||
}
|
||||
}
|
||||
|
||||
Status BackupEngineImpl::RestoreDBFromBackup(
|
||||
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
|
||||
const RestoreOptions& restore_options) {
|
||||
auto corrupt_itr = corrupt_backups_.find(backup_id);
|
||||
if (corrupt_itr != corrupt_backups_.end()) {
|
||||
return corrupt_itr->second.first;
|
||||
}
|
||||
auto backup_itr = backups_.find(backup_id);
|
||||
if (backup_itr == backups_.end()) {
|
||||
return Status::NotFound("Backup not found");
|
||||
@ -1005,35 +1042,10 @@ void BackupEngineImpl::DeleteChildren(const std::string& dir,
|
||||
}
|
||||
}
|
||||
|
||||
void BackupEngineImpl::GarbageCollection(bool full_scan) {
|
||||
Status BackupEngineImpl::GarbageCollect() {
|
||||
assert(!read_only_);
|
||||
Log(options_.info_log, "Starting garbage collection");
|
||||
std::vector<std::string> to_delete;
|
||||
for (auto& itr : backuped_file_infos_) {
|
||||
if (itr.second.refs == 0) {
|
||||
Status s = backup_env_->DeleteFile(GetAbsolutePath(itr.first));
|
||||
Log(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
|
||||
s.ToString().c_str());
|
||||
to_delete.push_back(itr.first);
|
||||
}
|
||||
}
|
||||
for (auto& td : to_delete) {
|
||||
backuped_file_infos_.erase(td);
|
||||
}
|
||||
if (!full_scan) {
|
||||
// take care of private dirs -- if full_scan == true, then full_scan will
|
||||
// take care of them
|
||||
for (auto backup_id : obsolete_backups_) {
|
||||
std::string private_dir = GetPrivateFileRel(backup_id);
|
||||
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
|
||||
Log(options_.info_log, "Deleting private dir %s -- %s",
|
||||
private_dir.c_str(), s.ToString().c_str());
|
||||
}
|
||||
}
|
||||
obsolete_backups_.clear();
|
||||
|
||||
if (full_scan) {
|
||||
Log(options_.info_log, "Starting full scan garbage collection");
|
||||
// delete obsolete shared files
|
||||
std::vector<std::string> shared_children;
|
||||
backup_env_->GetChildren(GetAbsolutePath(GetSharedFileRel()),
|
||||
@ -1081,7 +1093,8 @@ void BackupEngineImpl::GarbageCollection(bool full_scan) {
|
||||
Log(options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(),
|
||||
s.ToString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// ------- BackupMeta class --------
|
||||
@ -1257,6 +1270,10 @@ class BackupEngineReadOnlyImpl : public BackupEngineReadOnly {
|
||||
backup_engine_->GetBackupInfo(backup_info);
|
||||
}
|
||||
|
||||
virtual void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) {
|
||||
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
|
||||
}
|
||||
|
||||
virtual Status RestoreDBFromBackup(
|
||||
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
|
||||
const RestoreOptions& restore_options = RestoreOptions()) {
|
||||
@ -1302,6 +1319,11 @@ void BackupableDB::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
|
||||
backup_engine_->GetBackupInfo(backup_info);
|
||||
}
|
||||
|
||||
void
|
||||
BackupableDB::GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) {
|
||||
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
|
||||
}
|
||||
|
||||
Status BackupableDB::PurgeOldBackups(uint32_t num_backups_to_keep) {
|
||||
return backup_engine_->PurgeOldBackups(num_backups_to_keep);
|
||||
}
|
||||
@ -1314,6 +1336,10 @@ void BackupableDB::StopBackup() {
|
||||
backup_engine_->StopBackup();
|
||||
}
|
||||
|
||||
Status BackupableDB::GarbageCollect() {
|
||||
return backup_engine_->GarbageCollect();
|
||||
}
|
||||
|
||||
// --- RestoreBackupableDB methods ------
|
||||
|
||||
RestoreBackupableDB::RestoreBackupableDB(Env* db_env,
|
||||
@ -1329,6 +1355,11 @@ RestoreBackupableDB::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
|
||||
backup_engine_->GetBackupInfo(backup_info);
|
||||
}
|
||||
|
||||
void RestoreBackupableDB::GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) {
|
||||
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
|
||||
}
|
||||
|
||||
Status RestoreBackupableDB::RestoreDBFromBackup(
|
||||
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
|
||||
const RestoreOptions& restore_options) {
|
||||
@ -1351,6 +1382,10 @@ Status RestoreBackupableDB::DeleteBackup(BackupID backup_id) {
|
||||
return backup_engine_->DeleteBackup(backup_id);
|
||||
}
|
||||
|
||||
Status RestoreBackupableDB::GarbageCollect() {
|
||||
return backup_engine_->GarbageCollect();
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
@ -646,6 +646,32 @@ TEST(BackupableDBTest, CorruptionsTest) {
|
||||
ASSERT_OK(db_->CreateNewBackup(!!(rnd.Next() % 2)));
|
||||
CloseBackupableDB();
|
||||
AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5);
|
||||
|
||||
// make sure that no corrupt backups have actually been deleted!
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/1"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/2"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/3"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/4"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/5"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/1"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/2"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/3"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/4"));
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/5"));
|
||||
|
||||
// delete the corrupt backups and then make sure they're actually deleted
|
||||
OpenBackupableDB();
|
||||
ASSERT_OK(db_->DeleteBackup(5));
|
||||
ASSERT_OK(db_->DeleteBackup(4));
|
||||
ASSERT_OK(db_->DeleteBackup(3));
|
||||
(void) db_->GarbageCollect();
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/5") == false);
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/5") == false);
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/4") == false);
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/4") == false);
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/3") == false);
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/3") == false);
|
||||
CloseBackupableDB();
|
||||
}
|
||||
|
||||
// open DB, write, close DB, backup, restore, repeat
|
||||
@ -867,6 +893,8 @@ TEST(BackupableDBTest, DeleteTmpFiles) {
|
||||
file_manager_->WriteToFile(private_tmp_file, "tmp");
|
||||
ASSERT_EQ(true, file_manager_->FileExists(private_tmp_dir));
|
||||
OpenBackupableDB();
|
||||
// Need to call this explicitly to delete tmp files
|
||||
(void) db_->GarbageCollect();
|
||||
CloseBackupableDB();
|
||||
ASSERT_EQ(false, file_manager_->FileExists(shared_tmp));
|
||||
ASSERT_EQ(false, file_manager_->FileExists(private_tmp_file));
|
||||
|
Loading…
Reference in New Issue
Block a user