Misc Backup API enhancements (#8170)

Summary:
* CreateNewBackup(WithMetadata) returning the BackupID of new backup
through optional new output param. This is especially useful with the
new mutithreading support, so that you can transactionally determine the
ID of a backup you create.
* GetBackupInfo / GetLatestBackupInfo for individual backups, so that
you don't have to comb through a vector of backups if you don't want to.

Updated HISTORY.md (including re: BlobDB support as new feature)

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8170

Test Plan:
Added test logic to existing tests, to minimize increase in
cost of running tests

Reviewed By: zhichao-cao

Differential Revision: D27680410

Pulled By: pdillinger

fbshipit-source-id: 1fc45b73d81aae293ccd4a43d9583d7fd915d3eb
This commit is contained in:
Peter Dillinger 2021-04-12 10:57:35 -07:00 committed by Facebook GitHub Bot
parent 8972dd1ffa
commit bb75092574
4 changed files with 163 additions and 51 deletions

View File

@ -18,11 +18,13 @@
### Public API change
* Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead.
* Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace
* For new integrated BlobDB, add support for blob files for backup/restore like table files. Because of current limitations, blob files always use the kLegacyCrc32cAndFileSize naming scheme, and incremental backups must read and checksum all blob files in a DB, even for files that are already backed up.
* Added `FlushReason::kWalFull`, which is reported when a memtable is flushed due to the WAL reaching its size limit; those flushes were previously reported as `FlushReason::kWriteBufferManager`. Also, changed the reason for flushes triggered by the write buffer manager to `FlushReason::kWriteBufferManager`; they were previously reported as `FlushReason::kWriteBufferFull`.
### New Features
* Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true.
* Added BackupEngine support for integrated BlobDB, with blob files shared between backups when table files are shared. Because of current limitations, blob files always use the kLegacyCrc32cAndFileSize naming scheme, and incremental backups must read and checksum all blob files in a DB, even for files that are already backed up.
* Added an optional output parameter to BackupEngine::CreateNewBackup(WithMetadata) to return the BackupID of the new backup.
* Added BackupEngine::GetBackupInfo / GetLatestBackupInfo for querying individual backups.
## 6.19.0 (03/21/2021)
### Bug Fixes

View File

@ -371,6 +371,21 @@ class BackupEngineReadOnlyBase {
public:
virtual ~BackupEngineReadOnlyBase() {}
// Returns info about the latest good backup in backup_info, or NotFound
// no good backup exists.
// Setting include_file_details=true provides information about each
// backed-up file in BackupInfo::file_details and more.
virtual Status GetLatestBackupInfo(
BackupInfo* backup_info, bool include_file_details = false) const = 0;
// Returns info about a specific backup in backup_info, or NotFound
// or Corruption status if the requested backup id does not exist or is
// known corrupt.
// Setting include_file_details=true provides information about each
// backed-up file in BackupInfo::file_details and more.
virtual Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
bool include_file_details = false) const = 0;
// Returns info about backups in backup_info
// Setting include_file_details=true provides information about each
// backed-up file in BackupInfo::file_details and more.
@ -439,7 +454,7 @@ class BackupEngineAppendOnlyBase {
// same as CreateNewBackup, but stores extra application metadata.
virtual Status CreateNewBackupWithMetadata(
const CreateBackupOptions& options, DB* db,
const std::string& app_metadata) = 0;
const std::string& app_metadata, BackupID* new_backup_id = nullptr) = 0;
// keep here for backward compatibility.
virtual Status CreateNewBackupWithMetadata(
@ -451,9 +466,12 @@ class BackupEngineAppendOnlyBase {
return CreateNewBackupWithMetadata(options, db, app_metadata);
}
// Captures the state of the database by creating a new (latest) backup
virtual Status CreateNewBackup(const CreateBackupOptions& options, DB* db) {
return CreateNewBackupWithMetadata(options, db, "");
// Captures the state of the database by creating a new (latest) backup.
// On success (OK status), the BackupID of the new backup is saved to
// *new_backup_id when not nullptr.
virtual Status CreateNewBackup(const CreateBackupOptions& options, DB* db,
BackupID* new_backup_id = nullptr) {
return CreateNewBackupWithMetadata(options, db, "", new_backup_id);
}
// keep here for backward compatibility.

View File

@ -129,7 +129,8 @@ class BackupEngineImpl {
~BackupEngineImpl();
Status CreateNewBackupWithMetadata(const CreateBackupOptions& options, DB* db,
const std::string& app_metadata);
const std::string& app_metadata,
BackupID* new_backup_id_ptr);
Status PurgeOldBackups(uint32_t num_backups_to_keep);
@ -144,6 +145,9 @@ class BackupEngineImpl {
void GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details) const;
Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
bool include_file_details = false) const;
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) const;
Status RestoreDBFromBackup(const RestoreOptions& options, BackupID backup_id,
@ -460,6 +464,10 @@ class BackupEngineImpl {
mutable std::shared_ptr<Env> env_for_open_;
}; // BackupMeta
void SetBackupInfoFromBackupMeta(BackupID id, const BackupMeta& meta,
BackupInfo* backup_info,
bool include_file_details) const;
inline std::string GetAbsolutePath(
const std::string &relative_path = "") const {
assert(relative_path.size() == 0 || relative_path[0] != '/');
@ -802,9 +810,11 @@ class BackupEngineImplThreadSafe : public BackupEngine,
using BackupEngine::CreateNewBackupWithMetadata;
Status CreateNewBackupWithMetadata(const CreateBackupOptions& options, DB* db,
const std::string& app_metadata) override {
const std::string& app_metadata,
BackupID* new_backup_id) override {
WriteLock lock(&mutex_);
return impl_.CreateNewBackupWithMetadata(options, db, app_metadata);
return impl_.CreateNewBackupWithMetadata(options, db, app_metadata,
new_backup_id);
}
Status PurgeOldBackups(uint32_t num_backups_to_keep) override {
@ -827,6 +837,19 @@ class BackupEngineImplThreadSafe : public BackupEngine,
return impl_.GarbageCollect();
}
Status GetLatestBackupInfo(BackupInfo* backup_info,
bool include_file_details = false) const override {
ReadLock lock(&mutex_);
return impl_.GetBackupInfo(kLatestBackupIDMarker, backup_info,
include_file_details);
}
Status GetBackupInfo(BackupID backup_id, BackupInfo* backup_info,
bool include_file_details = false) const override {
ReadLock lock(&mutex_);
return impl_.GetBackupInfo(backup_id, backup_info, include_file_details);
}
void GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details) const override {
ReadLock lock(&mutex_);
@ -1181,8 +1204,8 @@ Status BackupEngineImpl::Initialize() {
}
Status BackupEngineImpl::CreateNewBackupWithMetadata(
const CreateBackupOptions& options, DB* db,
const std::string& app_metadata) {
const CreateBackupOptions& options, DB* db, const std::string& app_metadata,
BackupID* new_backup_id_ptr) {
assert(initialized_);
assert(!read_only_);
if (app_metadata.size() > kMaxAppMetaSize) {
@ -1417,6 +1440,9 @@ Status BackupEngineImpl::CreateNewBackupWithMetadata(
// in the LATEST_BACKUP file
latest_backup_id_ = new_backup_id;
latest_valid_backup_id_ = new_backup_id;
if (new_backup_id_ptr) {
*new_backup_id_ptr = new_backup_id;
}
ROCKS_LOG_INFO(options_.info_log, "Backup DONE. All is good");
// backup_speed is in byte/second
@ -1546,30 +1572,61 @@ Status BackupEngineImpl::DeleteBackupNoGC(BackupID backup_id) {
return Status::OK();
}
void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info,
void BackupEngineImpl::SetBackupInfoFromBackupMeta(
BackupID id, const BackupMeta& meta, BackupInfo* backup_info,
bool include_file_details) const {
assert(initialized_);
backup_info->reserve(backups_.size());
for (auto& backup : backups_) {
const BackupMeta& meta = *backup.second;
if (!meta.Empty()) {
backup_info->push_back(BackupInfo(backup.first, meta.GetTimestamp(),
meta.GetSize(), meta.GetNumberFiles(),
meta.GetAppMetadata()));
BackupInfo& binfo = backup_info->back();
*backup_info = BackupInfo(id, meta.GetTimestamp(), meta.GetSize(),
meta.GetNumberFiles(), meta.GetAppMetadata());
if (include_file_details) {
auto& file_details = binfo.file_details;
auto& file_details = backup_info->file_details;
file_details.reserve(meta.GetFiles().size());
for (auto& file_ptr : meta.GetFiles()) {
BackupFileInfo& finfo = *file_details.emplace(file_details.end());
finfo.relative_filename = file_ptr->filename;
finfo.size = file_ptr->size;
}
binfo.name_for_open =
GetAbsolutePath(GetPrivateFileRel(binfo.backup_id));
binfo.name_for_open.pop_back(); // remove trailing '/'
binfo.env_for_open = meta.GetEnvForOpen();
backup_info->name_for_open = GetAbsolutePath(GetPrivateFileRel(id));
backup_info->name_for_open.pop_back(); // remove trailing '/'
backup_info->env_for_open = meta.GetEnvForOpen();
}
}
Status BackupEngineImpl::GetBackupInfo(BackupID backup_id,
BackupInfo* backup_info,
bool include_file_details) const {
assert(initialized_);
if (backup_id == kLatestBackupIDMarker) {
// Note: Read latest_valid_backup_id_ inside of lock
backup_id = latest_valid_backup_id_;
}
auto corrupt_itr = corrupt_backups_.find(backup_id);
if (corrupt_itr != corrupt_backups_.end()) {
return Status::Corruption(corrupt_itr->second.first.ToString());
}
auto backup_itr = backups_.find(backup_id);
if (backup_itr == backups_.end()) {
return Status::NotFound("Backup not found");
}
auto& backup = backup_itr->second;
if (backup->Empty()) {
return Status::NotFound("Backup not found");
}
SetBackupInfoFromBackupMeta(backup_id, *backup, backup_info,
include_file_details);
return Status::OK();
}
void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info,
bool include_file_details) const {
assert(initialized_);
backup_info->resize(backups_.size());
size_t i = 0;
for (auto& backup : backups_) {
const BackupMeta& meta = *backup.second;
if (!meta.Empty()) {
SetBackupInfoFromBackupMeta(backup.first, meta, &backup_info->at(i++),
include_file_details);
}
}
}

View File

@ -2646,17 +2646,17 @@ TEST_F(BackupableDBTest, OpenBackupAsReadOnlyDB) {
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), /*flush*/ false));
db_.reset(); // CloseDB
DestroyDB(dbname_, options_);
std::vector<BackupInfo> backup_info;
BackupInfo backup_info;
// First, check that we get empty fields without include_file_details
backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ false);
ASSERT_EQ(backup_info.size(), 2);
ASSERT_EQ(backup_info[0].name_for_open, "");
ASSERT_FALSE(backup_info[0].env_for_open);
ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 1U, &backup_info,
/*with file details*/ false));
ASSERT_EQ(backup_info.name_for_open, "");
ASSERT_FALSE(backup_info.env_for_open);
// Now for the real test
backup_info.clear();
backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true);
ASSERT_EQ(backup_info.size(), 2);
backup_info = BackupInfo();
ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 1U, &backup_info,
/*with file details*/ true));
// Caution: DBOptions only holds a raw pointer to Env, so something else
// must keep it alive.
@ -2668,9 +2668,9 @@ TEST_F(BackupableDBTest, OpenBackupAsReadOnlyDB) {
opts.create_if_missing = false;
opts.info_log.reset();
opts.env = backup_info[0].env_for_open.get();
std::string name = backup_info[0].name_for_open;
backup_info.clear();
opts.env = backup_info.env_for_open.get();
std::string name = backup_info.name_for_open;
backup_info = BackupInfo();
ASSERT_OK(DB::OpenForReadOnly(opts, name, &db));
AssertExists(db, 0, 100);
@ -2680,13 +2680,13 @@ TEST_F(BackupableDBTest, OpenBackupAsReadOnlyDB) {
db = nullptr;
// Case 2: Keeping BackupInfo alive rather than BackupEngine also suffices
backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true);
ASSERT_EQ(backup_info.size(), 2);
ASSERT_OK(backup_engine_->GetBackupInfo(/*id*/ 2U, &backup_info,
/*with file details*/ true));
CloseBackupEngine();
opts.create_if_missing = true; // check also OK (though pointless)
opts.env = backup_info[1].env_for_open.get();
name = backup_info[1].name_for_open;
// Note: keeping backup_info[1] alive
opts.env = backup_info.env_for_open.get();
name = backup_info.name_for_open;
// Note: keeping backup_info alive
ASSERT_OK(DB::OpenForReadOnly(opts, name, &db));
AssertExists(db, 0, 200);
@ -2848,18 +2848,33 @@ TEST_F(BackupableDBTest, BackupWithMetadata) {
for (int i = 0; i < 5; ++i) {
const std::string metadata = std::to_string(i);
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
ASSERT_OK(
backup_engine_->CreateNewBackupWithMetadata(db_.get(), metadata, true));
// Here also test CreateNewBackupWithMetadata with CreateBackupOptions
// and outputting saved BackupID.
CreateBackupOptions opts;
opts.flush_before_backup = true;
BackupID new_id = 0;
ASSERT_OK(backup_engine_->CreateNewBackupWithMetadata(opts, db_.get(),
metadata, &new_id));
ASSERT_EQ(new_id, static_cast<BackupID>(i + 1));
}
CloseDBAndBackupEngine();
OpenDBAndBackupEngine();
{ // Verify in bulk BackupInfo
std::vector<BackupInfo> backup_infos;
backup_engine_->GetBackupInfo(&backup_infos);
ASSERT_EQ(5, backup_infos.size());
for (int i = 0; i < 5; i++) {
ASSERT_EQ(std::to_string(i), backup_infos[i].app_metadata);
}
}
// Also verify in individual BackupInfo
for (int i = 0; i < 5; i++) {
BackupInfo backup_info;
ASSERT_OK(backup_engine_->GetBackupInfo(static_cast<BackupID>(i + 1),
&backup_info));
ASSERT_EQ(std::to_string(i), backup_info.app_metadata);
}
CloseDBAndBackupEngine();
DestroyDB(dbname_, options_);
}
@ -3285,6 +3300,8 @@ TEST_F(BackupableDBTest, CreateWhenLatestBackupCorrupted) {
// succeed even when latest backup is corrupted.
const int kNumKeys = 5000;
OpenDBAndBackupEngine(true /* destroy_old_data */);
BackupInfo backup_info;
ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).IsNotFound());
FillDB(db_.get(), 0 /* from */, kNumKeys);
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
true /* flush_before_backup */));
@ -3293,12 +3310,26 @@ TEST_F(BackupableDBTest, CreateWhenLatestBackupCorrupted) {
CloseDBAndBackupEngine();
OpenDBAndBackupEngine();
ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).IsNotFound());
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(),
true /* flush_before_backup */));
ASSERT_TRUE(backup_engine_->GetLatestBackupInfo(&backup_info).ok());
ASSERT_EQ(2, backup_info.backup_id);
std::vector<BackupInfo> backup_infos;
backup_engine_->GetBackupInfo(&backup_infos);
ASSERT_EQ(1, backup_infos.size());
ASSERT_EQ(2, backup_infos[0].backup_id);
// Verify individual GetBackupInfo by ID
ASSERT_TRUE(backup_engine_->GetBackupInfo(0U, &backup_info).IsNotFound());
ASSERT_TRUE(backup_engine_->GetBackupInfo(1U, &backup_info).IsCorruption());
ASSERT_TRUE(backup_engine_->GetBackupInfo(2U, &backup_info).ok());
ASSERT_TRUE(backup_engine_->GetBackupInfo(3U, &backup_info).IsNotFound());
ASSERT_TRUE(
backup_engine_->GetBackupInfo(999999U, &backup_info).IsNotFound());
}
TEST_F(BackupableDBTest, WriteOnlyEngineNoSharedFileDeletion) {
@ -3456,7 +3487,11 @@ TEST_F(BackupableDBTest, BackgroundThreadCpuPriority) {
CreateBackupOptions options;
options.decrease_background_thread_cpu_priority = true;
options.background_thread_cpu_priority = CpuPriority::kIdle;
ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get()));
// Also check output backup_id with CreateNewBackup
BackupID new_id = 0;
ASSERT_OK(backup_engine_->CreateNewBackup(options, db_.get(), &new_id));
ASSERT_EQ(new_id, 5U);
ASSERT_EQ(priority, CpuPriority::kNormal);
}