Improve Backup Engine.

Summary:
Improve the backup engine by not deleting the corrupted
backup when it is detected; instead leaving it to the client
to delete the corrupted backup.

Also add a BackupEngine::Open() call.

Test Plan:
Add check to CorruptionTest inside backupable_db_test
to check that the corrupt backups are not deleted. The previous
version of the code failed this test as backups were deleted,
but after the changes in this commit, this test passes.

Run make check to ensure that no other tests fail.

Reviewers: sdong, benj, sanketh, sumeet, igor

Reviewed By: igor

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D28521
This commit is contained in:
Hasnain Lakhani 2014-11-13 09:51:41 -08:00
parent 1033db29f9
commit 31b02dc21d
4 changed files with 202 additions and 111 deletions

View File

@ -2,6 +2,9 @@
## Unreleased
* BackupEngine::NewBackupEngine() was deprecated; please use BackupEngine::Open() from now on.
* BackupableDB/RestoreBackupableDB have new GarbageCollect() methods, which will clean up files from corrupt and obsolete backups.
* BackupableDB/RestoreBackupableDB have new GetCorruptedBackups() methods which list corrupt backups.
## 3.7.0 (11/6/2014)
### Public API changes

View File

@ -177,6 +177,8 @@ class BackupEngineReadOnly {
// You can GetBackupInfo safely, even with other BackupEngine performing
// backups on the same directory
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
virtual void GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) = 0;
// Restoring DB from backup is NOT safe when there is another BackupEngine
// running that might call DeleteBackup() or PurgeOldBackups(). It is caller's
@ -196,7 +198,12 @@ class BackupEngine {
virtual ~BackupEngine() {}
static BackupEngine* NewBackupEngine(Env* db_env,
const BackupableDBOptions& options);
const BackupableDBOptions& options)
__attribute__((deprecated("Please use Open() instead")));
static Status Open(Env* db_env,
const BackupableDBOptions& options,
BackupEngine** backup_engine_ptr);
virtual Status CreateNewBackup(DB* db, bool flush_before_backup = false) = 0;
virtual Status PurgeOldBackups(uint32_t num_backups_to_keep) = 0;
@ -204,12 +211,16 @@ class BackupEngine {
virtual void StopBackup() = 0;
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
virtual void GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) = 0;
virtual Status RestoreDBFromBackup(
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options = RestoreOptions()) = 0;
virtual Status RestoreDBFromLatestBackup(
const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options = RestoreOptions()) = 0;
virtual Status GarbageCollect() = 0;
};
// Stack your DB with BackupableDB to be able to backup the DB
@ -228,6 +239,8 @@ class BackupableDB : public StackableDB {
Status CreateNewBackup(bool flush_before_backup = false);
// Returns info about backups in backup_info
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
// Returns info about corrupt backups in corrupt_backups
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
// deletes old backups, keeping latest num_backups_to_keep alive
Status PurgeOldBackups(uint32_t num_backups_to_keep);
// deletes a specific backup
@ -241,6 +254,11 @@ class BackupableDB : public StackableDB {
// next time you create BackupableDB or RestoreBackupableDB.
void StopBackup();
// Will delete all the files we don't need anymore
// It will do the full scan of the files/ directory and delete all the
// files that are not referenced.
Status GarbageCollect();
private:
BackupEngine* backup_engine_;
};
@ -253,6 +271,8 @@ class RestoreBackupableDB {
// Returns info about backups in backup_info
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
// Returns info about corrupt backups in corrupt_backups
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
// restore from backup with backup_id
// IMPORTANT -- if options_.share_table_files == true and you restore DB
@ -279,6 +299,11 @@ class RestoreBackupableDB {
// deletes a specific backup
Status DeleteBackup(BackupID backup_id);
// Will delete all the files we don't need anymore
// It will do the full scan of the files/ directory and delete all the
// files that are not referenced.
Status GarbageCollect();
private:
BackupEngine* backup_engine_;
};

View File

@ -121,8 +121,10 @@ class BackupEngineImpl : public BackupEngine {
void StopBackup() {
stop_backup_.store(true, std::memory_order_release);
}
Status GarbageCollect();
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids);
Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir,
const std::string& wal_dir,
const RestoreOptions& restore_options =
@ -285,16 +287,11 @@ class BackupEngineImpl : public BackupEngine {
uint64_t size_limit,
uint32_t* checksum_value);
// Will delete all the files we don't need anymore
// If full_scan == true, it will do the full scan of files/ directory
// and delete all the files that are not referenced from backuped_file_infos__
void GarbageCollection(bool full_scan);
// backup state data
BackupID latest_backup_id_;
std::map<BackupID, BackupMeta> backups_;
std::map<BackupID, std::pair<Status, BackupMeta> > corrupt_backups_;
std::unordered_map<std::string, FileInfo> backuped_file_infos_;
std::vector<BackupID> obsolete_backups_;
std::atomic<bool> stop_backup_;
// options data
@ -319,6 +316,13 @@ BackupEngine* BackupEngine::NewBackupEngine(
return new BackupEngineImpl(db_env, options);
}
Status BackupEngine::Open(Env* env,
const BackupableDBOptions& options,
BackupEngine** backup_engine_ptr) {
*backup_engine_ptr = new BackupEngineImpl(env, options);
return Status::OK();
}
BackupEngineImpl::BackupEngineImpl(Env* db_env,
const BackupableDBOptions& options,
bool read_only)
@ -377,14 +381,10 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
if (options_.destroy_old_data) { // Destory old data
assert(!read_only_);
for (auto& backup : backups_) {
backup.second.Delete();
obsolete_backups_.push_back(backup.first);
}
backups_.clear();
PurgeOldBackups(0);
(void) GarbageCollect();
// start from beginning
latest_backup_id_ = 0;
// GarbageCollection() will do the actual deletion
} else { // Load data from storage
// load the backups if any
for (auto& backup : backups_) {
@ -392,16 +392,13 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
if (!s.ok()) {
Log(options_.info_log, "Backup %u corrupted -- %s", backup.first,
s.ToString().c_str());
if (!read_only_) {
Log(options_.info_log, "-> Deleting backup %u", backup.first);
}
backup.second.Delete(!read_only_);
obsolete_backups_.push_back(backup.first);
corrupt_backups_.insert(std::make_pair(
backup.first, std::make_pair(s, backup.second)));
}
}
// delete obsolete backups from the structure
for (auto ob : obsolete_backups_) {
backups_.erase(ob);
for (auto corrupt : corrupt_backups_) {
backups_.erase(backups_.find(corrupt.first));
}
Status s = GetLatestBackupFileContents(&latest_backup_id_);
@ -417,16 +414,17 @@ BackupEngineImpl::BackupEngineImpl(Env* db_env,
}
// delete any backups that claim to be later than latest
for (auto itr = backups_.upper_bound(latest_backup_id_);
itr != backups_.end();) {
itr->second.Delete();
obsolete_backups_.push_back(itr->first);
itr = backups_.erase(itr);
std::vector<BackupID> later_ids;
for (auto itr = backups_.lower_bound(latest_backup_id_ + 1);
itr != backups_.end(); itr++) {
later_ids.push_back(itr->first);
}
for (auto id : later_ids) {
DeleteBackup(id);
}
if (!read_only_) {
PutLatestBackupFileContents(latest_backup_id_); // Ignore errors
GarbageCollection(true);
}
Log(options_.info_log, "Initialized BackupEngine, the latest backup is %u.",
latest_backup_id_);
@ -575,7 +573,7 @@ Status BackupEngineImpl::CreateNewBackup(DB* db, bool flush_before_backup) {
Log(options_.info_log, "Backup Statistics %s\n",
backup_statistics_.ToString().c_str());
backups_.erase(new_backup_id);
GarbageCollection(true);
(void) GarbageCollect();
return s;
}
@ -601,13 +599,15 @@ Status BackupEngineImpl::PurgeOldBackups(uint32_t num_backups_to_keep) {
assert(!read_only_);
Log(options_.info_log, "Purging old backups, keeping %u",
num_backups_to_keep);
while (num_backups_to_keep < backups_.size()) {
Log(options_.info_log, "Deleting backup %u", backups_.begin()->first);
backups_.begin()->second.Delete();
obsolete_backups_.push_back(backups_.begin()->first);
backups_.erase(backups_.begin());
std::vector<BackupID> to_delete;
auto itr = backups_.begin();
while ((backups_.size() - to_delete.size()) > num_backups_to_keep) {
to_delete.push_back(itr->first);
itr++;
}
for (auto backup_id : to_delete) {
DeleteBackup(backup_id);
}
GarbageCollection(false);
return Status::OK();
}
@ -615,13 +615,37 @@ Status BackupEngineImpl::DeleteBackup(BackupID backup_id) {
assert(!read_only_);
Log(options_.info_log, "Deleting backup %u", backup_id);
auto backup = backups_.find(backup_id);
if (backup == backups_.end()) {
return Status::NotFound("Backup not found");
if (backup != backups_.end()) {
backup->second.Delete();
backups_.erase(backup);
} else {
auto corrupt = corrupt_backups_.find(backup_id);
if (corrupt == corrupt_backups_.end()) {
return Status::NotFound("Backup not found");
}
corrupt->second.second.Delete();
corrupt_backups_.erase(corrupt);
}
backup->second.Delete();
obsolete_backups_.push_back(backup_id);
backups_.erase(backup);
GarbageCollection(false);
std::vector<std::string> to_delete;
for (auto& itr : backuped_file_infos_) {
if (itr.second.refs == 0) {
Status s = backup_env_->DeleteFile(GetAbsolutePath(itr.first));
Log(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
s.ToString().c_str());
to_delete.push_back(itr.first);
}
}
for (auto& td : to_delete) {
backuped_file_infos_.erase(td);
}
// take care of private dirs -- GarbageCollect() will take care of them
// if they are not empty
std::string private_dir = GetPrivateFileRel(backup_id);
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
Log(options_.info_log, "Deleting private dir %s -- %s",
private_dir.c_str(), s.ToString().c_str());
return Status::OK();
}
@ -636,9 +660,22 @@ void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
}
}
void
BackupEngineImpl::GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) {
corrupt_backup_ids->reserve(corrupt_backups_.size());
for (auto& backup : corrupt_backups_) {
corrupt_backup_ids->push_back(backup.first);
}
}
Status BackupEngineImpl::RestoreDBFromBackup(
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options) {
auto corrupt_itr = corrupt_backups_.find(backup_id);
if (corrupt_itr != corrupt_backups_.end()) {
return corrupt_itr->second.first;
}
auto backup_itr = backups_.find(backup_id);
if (backup_itr == backups_.end()) {
return Status::NotFound("Backup not found");
@ -1005,83 +1042,59 @@ void BackupEngineImpl::DeleteChildren(const std::string& dir,
}
}
void BackupEngineImpl::GarbageCollection(bool full_scan) {
Status BackupEngineImpl::GarbageCollect() {
assert(!read_only_);
Log(options_.info_log, "Starting garbage collection");
std::vector<std::string> to_delete;
for (auto& itr : backuped_file_infos_) {
if (itr.second.refs == 0) {
Status s = backup_env_->DeleteFile(GetAbsolutePath(itr.first));
Log(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
s.ToString().c_str());
to_delete.push_back(itr.first);
}
}
for (auto& td : to_delete) {
backuped_file_infos_.erase(td);
}
if (!full_scan) {
// take care of private dirs -- if full_scan == true, then full_scan will
// take care of them
for (auto backup_id : obsolete_backups_) {
std::string private_dir = GetPrivateFileRel(backup_id);
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
Log(options_.info_log, "Deleting private dir %s -- %s",
private_dir.c_str(), s.ToString().c_str());
}
}
obsolete_backups_.clear();
if (full_scan) {
Log(options_.info_log, "Starting full scan garbage collection");
// delete obsolete shared files
std::vector<std::string> shared_children;
backup_env_->GetChildren(GetAbsolutePath(GetSharedFileRel()),
&shared_children);
for (auto& child : shared_children) {
std::string rel_fname = GetSharedFileRel(child);
// if it's not refcounted, delete it
if (backuped_file_infos_.find(rel_fname) == backuped_file_infos_.end()) {
// this might be a directory, but DeleteFile will just fail in that
// case, so we're good
Status s = backup_env_->DeleteFile(GetAbsolutePath(rel_fname));
if (s.ok()) {
Log(options_.info_log, "Deleted %s", rel_fname.c_str());
}
// delete obsolete shared files
std::vector<std::string> shared_children;
backup_env_->GetChildren(GetAbsolutePath(GetSharedFileRel()),
&shared_children);
for (auto& child : shared_children) {
std::string rel_fname = GetSharedFileRel(child);
// if it's not refcounted, delete it
if (backuped_file_infos_.find(rel_fname) == backuped_file_infos_.end()) {
// this might be a directory, but DeleteFile will just fail in that
// case, so we're good
Status s = backup_env_->DeleteFile(GetAbsolutePath(rel_fname));
if (s.ok()) {
Log(options_.info_log, "Deleted %s", rel_fname.c_str());
}
}
// delete obsolete private files
std::vector<std::string> private_children;
backup_env_->GetChildren(GetAbsolutePath(GetPrivateDirRel()),
&private_children);
for (auto& child : private_children) {
BackupID backup_id = 0;
bool tmp_dir = child.find(".tmp") != std::string::npos;
sscanf(child.c_str(), "%u", &backup_id);
if (!tmp_dir && // if it's tmp_dir, delete it
(backup_id == 0 || backups_.find(backup_id) != backups_.end())) {
// it's either not a number or it's still alive. continue
continue;
}
// here we have to delete the dir and all its children
std::string full_private_path =
GetAbsolutePath(GetPrivateFileRel(backup_id, tmp_dir));
std::vector<std::string> subchildren;
backup_env_->GetChildren(full_private_path, &subchildren);
for (auto& subchild : subchildren) {
Status s = backup_env_->DeleteFile(full_private_path + subchild);
if (s.ok()) {
Log(options_.info_log, "Deleted %s",
(full_private_path + subchild).c_str());
}
}
// finally delete the private dir
Status s = backup_env_->DeleteDir(full_private_path);
Log(options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(),
s.ToString().c_str());
}
}
// delete obsolete private files
std::vector<std::string> private_children;
backup_env_->GetChildren(GetAbsolutePath(GetPrivateDirRel()),
&private_children);
for (auto& child : private_children) {
BackupID backup_id = 0;
bool tmp_dir = child.find(".tmp") != std::string::npos;
sscanf(child.c_str(), "%u", &backup_id);
if (!tmp_dir && // if it's tmp_dir, delete it
(backup_id == 0 || backups_.find(backup_id) != backups_.end())) {
// it's either not a number or it's still alive. continue
continue;
}
// here we have to delete the dir and all its children
std::string full_private_path =
GetAbsolutePath(GetPrivateFileRel(backup_id, tmp_dir));
std::vector<std::string> subchildren;
backup_env_->GetChildren(full_private_path, &subchildren);
for (auto& subchild : subchildren) {
Status s = backup_env_->DeleteFile(full_private_path + subchild);
if (s.ok()) {
Log(options_.info_log, "Deleted %s",
(full_private_path + subchild).c_str());
}
}
// finally delete the private dir
Status s = backup_env_->DeleteDir(full_private_path);
Log(options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(),
s.ToString().c_str());
}
return Status::OK();
}
// ------- BackupMeta class --------
@ -1257,6 +1270,10 @@ class BackupEngineReadOnlyImpl : public BackupEngineReadOnly {
backup_engine_->GetBackupInfo(backup_info);
}
virtual void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) {
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
}
virtual Status RestoreDBFromBackup(
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options = RestoreOptions()) {
@ -1302,6 +1319,11 @@ void BackupableDB::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
backup_engine_->GetBackupInfo(backup_info);
}
void
BackupableDB::GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) {
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
}
Status BackupableDB::PurgeOldBackups(uint32_t num_backups_to_keep) {
return backup_engine_->PurgeOldBackups(num_backups_to_keep);
}
@ -1314,6 +1336,10 @@ void BackupableDB::StopBackup() {
backup_engine_->StopBackup();
}
Status BackupableDB::GarbageCollect() {
return backup_engine_->GarbageCollect();
}
// --- RestoreBackupableDB methods ------
RestoreBackupableDB::RestoreBackupableDB(Env* db_env,
@ -1329,6 +1355,11 @@ RestoreBackupableDB::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
backup_engine_->GetBackupInfo(backup_info);
}
void RestoreBackupableDB::GetCorruptedBackups(
std::vector<BackupID>* corrupt_backup_ids) {
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
}
Status RestoreBackupableDB::RestoreDBFromBackup(
BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
const RestoreOptions& restore_options) {
@ -1351,6 +1382,10 @@ Status RestoreBackupableDB::DeleteBackup(BackupID backup_id) {
return backup_engine_->DeleteBackup(backup_id);
}
Status RestoreBackupableDB::GarbageCollect() {
return backup_engine_->GarbageCollect();
}
} // namespace rocksdb
#endif // ROCKSDB_LITE

View File

@ -646,6 +646,32 @@ TEST(BackupableDBTest, CorruptionsTest) {
ASSERT_OK(db_->CreateNewBackup(!!(rnd.Next() % 2)));
CloseBackupableDB();
AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5);
// make sure that no corrupt backups have actually been deleted!
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/1"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/2"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/3"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/4"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/5"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/1"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/2"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/3"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/4"));
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/5"));
// delete the corrupt backups and then make sure they're actually deleted
OpenBackupableDB();
ASSERT_OK(db_->DeleteBackup(5));
ASSERT_OK(db_->DeleteBackup(4));
ASSERT_OK(db_->DeleteBackup(3));
(void) db_->GarbageCollect();
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/5") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/5") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/4") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/4") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/3") == false);
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/3") == false);
CloseBackupableDB();
}
// open DB, write, close DB, backup, restore, repeat
@ -867,6 +893,8 @@ TEST(BackupableDBTest, DeleteTmpFiles) {
file_manager_->WriteToFile(private_tmp_file, "tmp");
ASSERT_EQ(true, file_manager_->FileExists(private_tmp_dir));
OpenBackupableDB();
// Need to call this explicitly to delete tmp files
(void) db_->GarbageCollect();
CloseBackupableDB();
ASSERT_EQ(false, file_manager_->FileExists(shared_tmp));
ASSERT_EQ(false, file_manager_->FileExists(private_tmp_file));