BackupableDB delete backups with newer seq number

Summary: We now delete backups with newer sequence number, so the clients don't have to handle confusing situations when they restore from backup.

Test Plan: added a unit test

Reviewers: dhruba

Reviewed By: dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D14547
This commit is contained in:
Igor Canadi 2013-12-10 20:49:28 -08:00
parent 204bb9cffd
commit 5e4ab767cf
3 changed files with 110 additions and 24 deletions

View File

@ -78,6 +78,9 @@ class BackupableDB : public StackableDB {
public:
// BackupableDBOptions have to be the same as the ones used in a previous
// incarnation of the DB
//
// BackupableDB ownes the pointer `DB* db` now. You should not delete it or
// use it after the invocation of BackupableDB
BackupableDB(DB* db, const BackupableDBOptions& options);
virtual ~BackupableDB();
@ -106,10 +109,12 @@ class RestoreBackupableDB {
// restore from backup with backup_id
// IMPORTANT -- if you restore from some backup that is not the latest,
// you HAVE to delete all the newer backups immediately, before creating
// new backup on the restored database. Otherwise, your new backups
// will be corrupted.
// TODO should we enforce this somehow?
// and you start creating new backups from the new DB, all the backups
// that were newer than the backup you restored from will be deleted
//
// Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3.
// If you try creating a new backup now, old backups 4 and 5 will be deleted
// and new backup with ID 4 will be created.
Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir,
const std::string& wal_dir);

View File

@ -40,6 +40,8 @@ class BackupEngine {
return RestoreDBFromBackup(latest_backup_id_, db_dir, wal_dir);
}
void DeleteBackupsNewerThan(uint64_t sequence_number);
private:
class BackupMeta {
public:
@ -59,6 +61,12 @@ class BackupEngine {
uint64_t GetSize() const {
return size_;
}
void SetSequenceNumber(uint64_t sequence_number) {
sequence_number_ = sequence_number;
}
uint64_t GetSequenceNumber() {
return sequence_number_;
}
void AddFile(const std::string& filename, uint64_t size);
void Delete();
@ -76,6 +84,9 @@ class BackupEngine {
private:
int64_t timestamp_;
// sequence number is only approximate, should not be used
// by clients
uint64_t sequence_number_;
uint64_t size_;
std::string const meta_filename_;
// files with relative paths (without "/" prefix!!)
@ -232,11 +243,31 @@ BackupEngine::~BackupEngine() {
LogFlush(options_.info_log);
}
void BackupEngine::DeleteBackupsNewerThan(uint64_t sequence_number) {
for (auto backup : backups_) {
if (backup.second.GetSequenceNumber() > sequence_number) {
Log(options_.info_log,
"Deleting backup %u because sequence number (%lu) is newer than %lu",
backup.first, backup.second.GetSequenceNumber(), sequence_number);
backup.second.Delete();
obsolete_backups_.push_back(backup.first);
}
}
for (auto ob : obsolete_backups_) {
backups_.erase(backups_.find(ob));
}
auto itr = backups_.end();
latest_backup_id_ = (itr == backups_.begin()) ? 0 : (--itr)->first;
PutLatestBackupFileContents(latest_backup_id_); // Ignore errors
GarbageCollection(false);
}
Status BackupEngine::CreateNewBackup(DB* db, bool flush_before_backup) {
Status s;
std::vector<std::string> live_files;
VectorLogPtr live_wal_files;
uint64_t manifest_file_size = 0;
uint64_t sequence_number = db->GetLatestSequenceNumber();
s = db->DisableFileDeletions();
if (s.ok()) {
@ -261,6 +292,7 @@ Status BackupEngine::CreateNewBackup(DB* db, bool flush_before_backup) {
assert(ret.second == true);
auto& new_backup = ret.first->second;
new_backup.RecordTimestamp();
new_backup.SetSequenceNumber(sequence_number);
Log(options_.info_log, "Started the backup process -- creating backup %u",
new_backup_id);
@ -603,8 +635,8 @@ void BackupEngine::GarbageCollection(bool full_scan) {
Log(options_.info_log, "Deleting private dir %s -- %s",
private_dir.c_str(), s.ToString().c_str());
}
obsolete_backups_.clear();
}
obsolete_backups_.clear();
if (full_scan) {
Log(options_.info_log, "Starting full scan garbage collection");
@ -684,6 +716,7 @@ void BackupEngine::BackupMeta::Delete() {
// each backup meta file is of the format:
// <timestamp>
// <seq number>
// <number of files>
// <file1>
// <file2>
@ -711,14 +744,24 @@ Status BackupEngine::BackupMeta::LoadFromFile(const std::string& backup_dir) {
int bytes_read = 0;
sscanf(data.data(), "%ld%n", &timestamp_, &bytes_read);
data.remove_prefix(bytes_read + 1); // +1 for '\n'
sscanf(data.data(), "%lu%n", &sequence_number_, &bytes_read);
data.remove_prefix(bytes_read + 1); // +1 for '\n'
sscanf(data.data(), "%u%n", &num_files, &bytes_read);
data.remove_prefix(bytes_read + 1); // +1 for '\n'
std::vector<std::pair<std::string, uint64_t>> files;
for (uint32_t i = 0; s.ok() && i < num_files; ++i) {
std::string filename = GetSliceUntil(&data, '\n').ToString();
uint64_t size;
s = env_->GetFileSize(backup_dir + "/" + filename, &size);
AddFile(filename, size);
files.push_back(std::make_pair(filename, size));
}
if (s.ok()) {
for (auto file : files) {
AddFile(file.first, file.second);
}
}
return s;
@ -738,6 +781,8 @@ Status BackupEngine::BackupMeta::StoreToFile(bool sync) {
unique_ptr<char[]> buf(new char[max_backup_meta_file_size_]);
int len = 0, buf_size = max_backup_meta_file_size_;
len += snprintf(buf.get(), buf_size, "%" PRId64 "\n", timestamp_);
len += snprintf(buf.get() + len, buf_size - len, "%" PRIu64 "\n",
sequence_number_);
len += snprintf(buf.get() + len, buf_size - len, "%zu\n", files_.size());
for (size_t i = 0; i < files_.size(); ++i) {
len += snprintf(buf.get() + len, buf_size - len, "%s\n", files_[i].c_str());
@ -758,9 +803,10 @@ Status BackupEngine::BackupMeta::StoreToFile(bool sync) {
// --- BackupableDB methods --------
BackupableDB::BackupableDB(DB* db, const BackupableDBOptions& options) :
StackableDB(db),
backup_engine_(new BackupEngine(db->GetEnv(), options)) {}
BackupableDB::BackupableDB(DB* db, const BackupableDBOptions& options)
: StackableDB(db), backup_engine_(new BackupEngine(db->GetEnv(), options)) {
backup_engine_->DeleteBackupsNewerThan(GetLatestSequenceNumber());
}
BackupableDB::~BackupableDB() {
delete backup_engine_;

View File

@ -7,6 +7,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/types.h"
#include "rocksdb/transaction_log.h"
#include "utilities/utility_db.h"
#include "utilities/backupable_db.h"
@ -29,7 +30,11 @@ class DummyDB : public StackableDB {
/* implicit */
DummyDB(const Options& options, const std::string& dbname)
: StackableDB(nullptr), options_(options), dbname_(dbname),
deletions_enabled_(true) {}
deletions_enabled_(true), sequence_number_(0) {}
virtual SequenceNumber GetLatestSequenceNumber() const {
return ++sequence_number_;
}
virtual const std::string& GetName() const override {
return dbname_;
@ -117,6 +122,7 @@ class DummyDB : public StackableDB {
Options options_;
std::string dbname_;
bool deletions_enabled_;
mutable SequenceNumber sequence_number_;
}; // DummyDB
class TestEnv : public EnvWrapper {
@ -292,7 +298,7 @@ class BackupableDBTest {
// set up db options
options_.create_if_missing = true;
options_.paranoid_checks = true;
options_.write_buffer_size = 1 << 19; // 512KB
options_.write_buffer_size = 1 << 17; // 128KB
options_.env = test_db_env_.get();
options_.wal_dir = dbname_;
// set up backup db options
@ -305,6 +311,12 @@ class BackupableDBTest {
DestroyDB(dbname_, Options());
}
DB* OpenDB() {
DB* db;
ASSERT_OK(DB::Open(options_, dbname_, &db));
return db;
}
void OpenBackupableDB(bool destroy_old_data = false, bool dummy = false) {
// reset all the defaults
test_backup_env_->SetLimitWrittenFiles(1000000);
@ -354,12 +366,12 @@ class BackupableDBTest {
} else {
ASSERT_OK(restore_db_->RestoreDBFromLatestBackup(dbname_, dbname_));
}
OpenBackupableDB();
AssertExists(db_.get(), start_exist, end_exist);
DB* db = OpenDB();
AssertExists(db, start_exist, end_exist);
if (end != 0) {
AssertEmpty(db_.get(), end_exist, end);
AssertEmpty(db, end_exist, end);
}
CloseBackupableDB();
delete db;
if (opened_restore) {
CloseRestoreDB();
}
@ -450,7 +462,7 @@ TEST(BackupableDBTest, NoDoubleCopy) {
// not be able to open that backup, but all other backups should be
// fine
TEST(BackupableDBTest, CorruptionsTest) {
const int keys_iteration = 20000;
const int keys_iteration = 5000;
Random rnd(6);
Status s;
@ -516,7 +528,7 @@ TEST(BackupableDBTest, CorruptionsTest) {
// open DB, write, close DB, backup, restore, repeat
TEST(BackupableDBTest, OfflineIntegrationTest) {
// has to be a big number, so that it triggers the memtable flush
const int keys_iteration = 20000;
const int keys_iteration = 5000;
const int max_key = keys_iteration * 4 + 10;
// first iter -- flush before backup
// second iter -- don't flush before backup
@ -542,9 +554,9 @@ TEST(BackupableDBTest, OfflineIntegrationTest) {
DestroyDB(dbname_, Options());
// ---- make sure it's empty ----
OpenBackupableDB();
AssertEmpty(db_.get(), 0, fill_up_to);
CloseBackupableDB();
DB* db = OpenDB();
AssertEmpty(db, 0, fill_up_to);
delete db;
// ---- restore the DB ----
OpenRestoreDB();
@ -563,7 +575,7 @@ TEST(BackupableDBTest, OfflineIntegrationTest) {
// open DB, write, backup, write, backup, close, restore
TEST(BackupableDBTest, OnlineIntegrationTest) {
// has to be a big number, so that it triggers the memtable flush
const int keys_iteration = 20000;
const int keys_iteration = 5000;
const int max_key = keys_iteration * 4 + 10;
Random rnd(7);
// delete old data
@ -591,9 +603,9 @@ TEST(BackupableDBTest, OnlineIntegrationTest) {
DestroyDB(dbname_, Options());
// ---- make sure it's empty ----
OpenBackupableDB();
AssertEmpty(db_.get(), 0, max_key);
CloseBackupableDB();
DB* db = OpenDB();
AssertEmpty(db, 0, max_key);
delete db;
// ---- restore every backup and verify all the data is there ----
OpenRestoreDB();
@ -624,6 +636,29 @@ TEST(BackupableDBTest, OnlineIntegrationTest) {
CloseRestoreDB();
}
TEST(BackupableDBTest, DeleteNewerBackups) {
// create backups 1, 2, 3, 4, 5
OpenBackupableDB(true);
for (int i = 0; i < 5; ++i) {
FillDB(db_.get(), 100 * i, 100 * (i + 1));
ASSERT_OK(db_->CreateNewBackup(!!(i % 2)));
}
CloseBackupableDB();
// backup 3 is fine
AssertBackupConsistency(3, 0, 300, 500);
// this should delete backups 4 and 5
OpenBackupableDB();
CloseBackupableDB();
// backups 4 and 5 don't exist
OpenRestoreDB();
Status s = restore_db_->RestoreDBFromBackup(4, dbname_, dbname_);
ASSERT_TRUE(s.IsNotFound());
s = restore_db_->RestoreDBFromBackup(5, dbname_, dbname_);
ASSERT_TRUE(s.IsNotFound());
CloseRestoreDB();
}
} // anon namespace
} // namespace rocksdb