Add BackupEngine API for backup file details (#8042)
Summary: This API can be used for things like determining how much space can be freed up by deleting a particular backup, etc. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8042 Test Plan: validation of the API added to many existing backup unit tests Reviewed By: mrambacher Differential Revision: D26936577 Pulled By: pdillinger fbshipit-source-id: f0bbd90f0917b9781a6837652fb4616d9247816a
This commit is contained in:
parent
82b3888433
commit
589ea6bec2
@ -17,6 +17,7 @@
|
||||
### New Features
|
||||
* Support compaction filters for the new implementation of BlobDB. Add `FilterBlobByKey()` to `CompactionFilter`. Subclasses can override this method so that compaction filters can determine whether the actual blob value has to be read during compaction. Use a new `kUndetermined` in `CompactionFilter::Decision` to indicated that further action is necessary for compaction filter to make a decision.
|
||||
* Add support to extend retrieval of checksums for blob files from the MANIFEST when checkpointing. During backup, rocksdb can detect corruption in blob files during file copies.
|
||||
* Add an option to BackupEngine::GetBackupInfo to include the name and size of each backed-up file. Especially in the presence of file sharing among backups, this offers detailed insight into bakup space usage.
|
||||
* Enable backward iteration on keys with user-defined timestamps.
|
||||
|
||||
## 6.18.0 (02/19/2021)
|
||||
|
@ -269,16 +269,35 @@ struct RestoreOptions {
|
||||
: keep_log_files(_keep_log_files) {}
|
||||
};
|
||||
|
||||
struct BackupFileInfo {
|
||||
// File name and path relative to the backup_dir directory.
|
||||
std::string relative_filename;
|
||||
|
||||
// Size of the file in bytes, not including filesystem overheads.
|
||||
uint64_t size;
|
||||
};
|
||||
|
||||
typedef uint32_t BackupID;
|
||||
|
||||
struct BackupInfo {
|
||||
BackupID backup_id;
|
||||
// Creation time, according to GetCurrentTime
|
||||
int64_t timestamp;
|
||||
|
||||
// Total size in bytes (based on file payloads, not including filesystem
|
||||
// overheads or backup meta file)
|
||||
uint64_t size;
|
||||
|
||||
// Number of backed up files, some of which might be shared with other
|
||||
// backups. Does not include backup meta file.
|
||||
uint32_t number_files;
|
||||
|
||||
// Backup API user metadata
|
||||
std::string app_metadata;
|
||||
|
||||
// Backup file details, if requested
|
||||
std::vector<BackupFileInfo> file_details;
|
||||
|
||||
BackupInfo() {}
|
||||
|
||||
BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size,
|
||||
@ -334,12 +353,15 @@ class BackupEngineReadOnly {
|
||||
|
||||
// Returns info about backups in backup_info
|
||||
// You can GetBackupInfo safely, even with other BackupEngine performing
|
||||
// backups on the same directory
|
||||
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
|
||||
// backups on the same directory.
|
||||
// Setting include_file_details=true provides information about each
|
||||
// backed-up file in BackupInfo::file_details.
|
||||
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info,
|
||||
bool include_file_details = false) const = 0;
|
||||
|
||||
// Returns info about corrupt backups in corrupt_backups
|
||||
virtual void GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) = 0;
|
||||
std::vector<BackupID>* corrupt_backup_ids) const = 0;
|
||||
|
||||
// Restoring DB from backup is NOT safe when there is another BackupEngine
|
||||
// running that might call DeleteBackup() or PurgeOldBackups(). It is caller's
|
||||
@ -457,11 +479,12 @@ class BackupEngine {
|
||||
virtual void StopBackup() = 0;
|
||||
|
||||
// Returns info about backups in backup_info
|
||||
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
|
||||
virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info,
|
||||
bool include_file_details = false) const = 0;
|
||||
|
||||
// Returns info about corrupt backups in corrupt_backups
|
||||
virtual void GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) = 0;
|
||||
std::vector<BackupID>* corrupt_backup_ids) const = 0;
|
||||
|
||||
// restore from backup with backup_id
|
||||
// IMPORTANT -- if options_.share_table_files == true,
|
||||
|
@ -130,9 +130,11 @@ class BackupEngineImpl : public BackupEngine {
|
||||
|
||||
// The returned BackupInfos are in chronological order, which means the
|
||||
// latest backup comes last.
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info) override;
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info,
|
||||
bool include_file_details) const override;
|
||||
|
||||
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) override;
|
||||
void GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) const override;
|
||||
|
||||
using BackupEngine::RestoreDBFromBackup;
|
||||
Status RestoreDBFromBackup(const RestoreOptions& options, BackupID backup_id,
|
||||
@ -223,13 +225,13 @@ class BackupEngineImpl : public BackupEngine {
|
||||
uint64_t GetSize() const {
|
||||
return size_;
|
||||
}
|
||||
uint32_t GetNumberFiles() { return static_cast<uint32_t>(files_.size()); }
|
||||
uint32_t GetNumberFiles() const {
|
||||
return static_cast<uint32_t>(files_.size());
|
||||
}
|
||||
void SetSequenceNumber(uint64_t sequence_number) {
|
||||
sequence_number_ = sequence_number;
|
||||
}
|
||||
uint64_t GetSequenceNumber() {
|
||||
return sequence_number_;
|
||||
}
|
||||
uint64_t GetSequenceNumber() const { return sequence_number_; }
|
||||
|
||||
const std::string& GetAppMetadata() const { return app_metadata_; }
|
||||
|
||||
@ -241,9 +243,7 @@ class BackupEngineImpl : public BackupEngine {
|
||||
|
||||
Status Delete(bool delete_meta = true);
|
||||
|
||||
bool Empty() {
|
||||
return files_.empty();
|
||||
}
|
||||
bool Empty() const { return files_.empty(); }
|
||||
|
||||
std::shared_ptr<FileInfo> GetFile(const std::string& filename) const {
|
||||
auto it = file_infos_->find(filename);
|
||||
@ -252,7 +252,7 @@ class BackupEngineImpl : public BackupEngine {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
const std::vector<std::shared_ptr<FileInfo>>& GetFiles() {
|
||||
const std::vector<std::shared_ptr<FileInfo>>& GetFiles() const {
|
||||
return files_;
|
||||
}
|
||||
|
||||
@ -1278,21 +1278,31 @@ Status BackupEngineImpl::DeleteBackupInternal(BackupID backup_id) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
|
||||
void BackupEngineImpl::GetBackupInfo(std::vector<BackupInfo>* backup_info,
|
||||
bool include_file_details) const {
|
||||
assert(initialized_);
|
||||
backup_info->reserve(backups_.size());
|
||||
for (auto& backup : backups_) {
|
||||
if (!backup.second->Empty()) {
|
||||
backup_info->push_back(BackupInfo(
|
||||
backup.first, backup.second->GetTimestamp(), backup.second->GetSize(),
|
||||
backup.second->GetNumberFiles(), backup.second->GetAppMetadata()));
|
||||
const BackupMeta& meta = *backup.second;
|
||||
if (!meta.Empty()) {
|
||||
backup_info->push_back(BackupInfo(backup.first, meta.GetTimestamp(),
|
||||
meta.GetSize(), meta.GetNumberFiles(),
|
||||
meta.GetAppMetadata()));
|
||||
if (include_file_details) {
|
||||
auto& file_details = backup_info->back().file_details;
|
||||
file_details.reserve(meta.GetFiles().size());
|
||||
for (auto& file_ptr : meta.GetFiles()) {
|
||||
BackupFileInfo& info = *file_details.emplace(file_details.end());
|
||||
info.relative_filename = file_ptr->filename;
|
||||
info.size = file_ptr->size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
BackupEngineImpl::GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) {
|
||||
void BackupEngineImpl::GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) const {
|
||||
assert(initialized_);
|
||||
corrupt_backup_ids->reserve(corrupt_backups_.size());
|
||||
for (auto& backup : corrupt_backups_) {
|
||||
@ -2305,11 +2315,13 @@ class BackupEngineReadOnlyImpl : public BackupEngineReadOnly {
|
||||
|
||||
// The returned BackupInfos are in chronological order, which means the
|
||||
// latest backup comes last.
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info) override {
|
||||
backup_engine_->GetBackupInfo(backup_info);
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info,
|
||||
bool include_file_details) const override {
|
||||
backup_engine_->GetBackupInfo(backup_info, include_file_details);
|
||||
}
|
||||
|
||||
void GetCorruptedBackups(std::vector<BackupID>* corrupt_backup_ids) override {
|
||||
void GetCorruptedBackups(
|
||||
std::vector<BackupID>* corrupt_backup_ids) const override {
|
||||
backup_engine_->GetCorruptedBackups(corrupt_backup_ids);
|
||||
}
|
||||
|
||||
|
@ -709,6 +709,69 @@ class BackupableDBTest : public testing::Test {
|
||||
|
||||
void CloseBackupEngine() { backup_engine_.reset(nullptr); }
|
||||
|
||||
// cross-cutting test of GetBackupInfo
|
||||
void AssertBackupInfoConsistency() {
|
||||
std::vector<BackupInfo> backup_info;
|
||||
backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true);
|
||||
std::map<std::string, uint64_t> file_sizes;
|
||||
|
||||
// Find the files that are supposed to be there
|
||||
for (auto& backup : backup_info) {
|
||||
uint64_t sum_for_backup = 0;
|
||||
for (auto& file : backup.file_details) {
|
||||
auto e = file_sizes.find(file.relative_filename);
|
||||
if (e == file_sizes.end()) {
|
||||
// fprintf(stderr, "Adding %s -> %u\n",
|
||||
// file.relative_filename.c_str(), (unsigned)file.size);
|
||||
file_sizes[file.relative_filename] = file.size;
|
||||
} else {
|
||||
ASSERT_EQ(file_sizes[file.relative_filename], file.size);
|
||||
}
|
||||
sum_for_backup += file.size;
|
||||
}
|
||||
ASSERT_EQ(backup.size, sum_for_backup);
|
||||
}
|
||||
|
||||
std::vector<BackupID> corrupt_backup_ids;
|
||||
backup_engine_->GetCorruptedBackups(&corrupt_backup_ids);
|
||||
bool has_corrupt = corrupt_backup_ids.size() > 0;
|
||||
|
||||
// Compare with what's in backup dir
|
||||
std::vector<std::string> child_dirs;
|
||||
ASSERT_OK(
|
||||
test_backup_env_->GetChildren(backupdir_ + "/private", &child_dirs));
|
||||
for (auto& dir : child_dirs) {
|
||||
dir = "private/" + dir;
|
||||
}
|
||||
child_dirs.push_back("shared"); // might not exist
|
||||
child_dirs.push_back("shared_checksum"); // might not exist
|
||||
for (auto& dir : child_dirs) {
|
||||
std::vector<std::string> children;
|
||||
test_backup_env_->GetChildren(backupdir_ + "/" + dir, &children)
|
||||
.PermitUncheckedError();
|
||||
// fprintf(stderr, "ls %s\n", (backupdir_ + "/" + dir).c_str());
|
||||
for (auto& file : children) {
|
||||
uint64_t size;
|
||||
size = UINT64_MAX; // appease clang-analyze
|
||||
std::string rel_file = dir + "/" + file;
|
||||
// fprintf(stderr, "stat %s\n", (backupdir_ + "/" + rel_file).c_str());
|
||||
ASSERT_OK(
|
||||
test_backup_env_->GetFileSize(backupdir_ + "/" + rel_file, &size));
|
||||
auto e = file_sizes.find(rel_file);
|
||||
if (e == file_sizes.end()) {
|
||||
// The only case in which we should find files not reported
|
||||
ASSERT_TRUE(has_corrupt);
|
||||
} else {
|
||||
ASSERT_EQ(e->second, size);
|
||||
file_sizes.erase(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Everything should have been matched
|
||||
ASSERT_EQ(file_sizes.size(), 0);
|
||||
}
|
||||
|
||||
// restores backup backup_id and asserts the existence of
|
||||
// [start_exist, end_exist> and not-existence of
|
||||
// [end_exist, end>
|
||||
@ -724,6 +787,9 @@ class BackupableDBTest : public testing::Test {
|
||||
opened_backup_engine = true;
|
||||
OpenBackupEngine();
|
||||
}
|
||||
AssertBackupInfoConsistency();
|
||||
|
||||
// Now perform restore
|
||||
if (backup_id > 0) {
|
||||
ASSERT_OK(backup_engine_->RestoreDBFromBackup(backup_id, dbname_, dbname_,
|
||||
restore_options));
|
||||
@ -732,6 +798,7 @@ class BackupableDBTest : public testing::Test {
|
||||
restore_options));
|
||||
}
|
||||
DB* db = OpenDB();
|
||||
// Check DB contents
|
||||
AssertExists(db, start_exist, end_exist);
|
||||
if (end != 0) {
|
||||
AssertEmpty(db, end_exist, end);
|
||||
|
Loading…
Reference in New Issue
Block a user