BackupEngine::VerifyBackup
verifies checksum by default (#7014)
Summary: A parameter `verify_with_checksum` is added to `BackupEngine::VerifyBackup`, which is true by default. So now `BackupEngine::VerifyBackup` verifies backup files with checksum AND file size by default. When `verify_with_checksum` is false, `BackupEngine::VerifyBackup` only compares file sizes to verify backup files. Also add a test for the case when corruption does not change the file size. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7014 Test Plan: Passed backupable_db_test Reviewed By: zhichao-cao Differential Revision: D22165590 Pulled By: gg814 fbshipit-source-id: 606a7450714e868bceb38598c89fd356c6004f4f
This commit is contained in:
parent
f9817201af
commit
1569dc48f5
@ -13,6 +13,7 @@
|
||||
### Public API Change
|
||||
* `DB::GetDbSessionId(std::string& session_id)` is added. `session_id` stores a unique identifier that gets reset every time the DB is opened. This DB session ID should be unique among all open DB instances on all hosts, and should be unique among re-openings of the same or other DBs. This identifier is recorded in the LOG file on the line starting with "DB Session ID:".
|
||||
* `DB::OpenForReadOnly()` now returns `Status::NotFound` when the specified DB directory does not exist. Previously the error returned depended on the underlying `Env`. This change is available in all 6.11 releases as well.
|
||||
* A parameter `verify_with_checksum` is added to `BackupEngine::VerifyBackup`, which is true by default. So now `BackupEngine::VerifyBackup` verifies checksums and file sizes of backup files by default. Pass `false` for `verify_with_checksum` to maintain the previous behavior and performance of `BackupEngine::VerifyBackup`, by only verifying sizes of backup files.
|
||||
|
||||
### New Features
|
||||
* DB identity (`db_id`) and DB session identity (`db_session_id`) are added to table properties and stored in SST files. SST files generated from SstFileWriter and Repairer have DB identity “SST Writer” and “DB Repairer”, respectively. Their DB session IDs are generated in the same way as `DB::GetDbSessionId`. The session ID for SstFileWriter (resp., Repairer) resets every time `SstFileWriter::Open` (resp., `Repairer::Run`) is called.
|
||||
|
@ -293,16 +293,23 @@ class BackupEngineReadOnly {
|
||||
return RestoreDBFromLatestBackup(options, db_dir, wal_dir);
|
||||
}
|
||||
|
||||
// If verify_with_checksum is true, this function
|
||||
// inspects the current checksums and file sizes of backup files to see if
|
||||
// they match our expectation.
|
||||
//
|
||||
// If verify_with_checksum is false, this function
|
||||
// checks that each file exists and that the size of the file matches our
|
||||
// expectations. it does not check file checksum.
|
||||
// expectation. It does not check file checksum.
|
||||
//
|
||||
// If this BackupEngine created the backup, it compares the files' current
|
||||
// sizes against the number of bytes written to them during creation.
|
||||
// Otherwise, it compares the files' current sizes against their sizes when
|
||||
// the BackupEngine was opened.
|
||||
// sizes (and current checksum) against the number of bytes written to
|
||||
// them (and the checksum calculated) during creation.
|
||||
// Otherwise, it compares the files' current sizes (and checksums) against
|
||||
// their sizes (and checksums) when the BackupEngine was opened.
|
||||
//
|
||||
// Returns Status::OK() if all checks are good
|
||||
virtual Status VerifyBackup(BackupID backup_id) = 0;
|
||||
virtual Status VerifyBackup(BackupID backup_id,
|
||||
bool verify_with_checksum = true) = 0;
|
||||
};
|
||||
|
||||
// A backup engine for creating new backups.
|
||||
@ -414,10 +421,17 @@ class BackupEngine {
|
||||
return RestoreDBFromLatestBackup(options, db_dir, wal_dir);
|
||||
}
|
||||
|
||||
// If verify_with_checksum is true, this function
|
||||
// inspects the current checksums and file sizes of backup files to see if
|
||||
// they match our expectation.
|
||||
//
|
||||
// If verify_with_checksum is false, this function
|
||||
// checks that each file exists and that the size of the file matches our
|
||||
// expectations. it does not check file checksum.
|
||||
// expectation. It does not check file checksum.
|
||||
//
|
||||
// Returns Status::OK() if all checks are good
|
||||
virtual Status VerifyBackup(BackupID backup_id) = 0;
|
||||
virtual Status VerifyBackup(BackupID backup_id,
|
||||
bool verify_with_checksum = true) = 0;
|
||||
|
||||
// Will delete any files left over from incomplete creation or deletion of
|
||||
// a backup. This is not normally needed as those operations also clean up
|
||||
|
@ -128,7 +128,8 @@ class BackupEngineImpl : public BackupEngine {
|
||||
wal_dir);
|
||||
}
|
||||
|
||||
Status VerifyBackup(BackupID backup_id) override;
|
||||
Status VerifyBackup(BackupID backup_id,
|
||||
bool verify_with_checksum = true) override;
|
||||
|
||||
Status Initialize();
|
||||
|
||||
@ -1297,7 +1298,9 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options,
|
||||
return s;
|
||||
}
|
||||
|
||||
Status BackupEngineImpl::VerifyBackup(BackupID backup_id) {
|
||||
Status BackupEngineImpl::VerifyBackup(BackupID backup_id,
|
||||
bool verify_with_checksum) {
|
||||
// Check if backup_id is corrupted, or valid and registered
|
||||
assert(initialized_);
|
||||
auto corrupt_itr = corrupt_backups_.find(backup_id);
|
||||
if (corrupt_itr != corrupt_backups_.end()) {
|
||||
@ -1316,6 +1319,7 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id) {
|
||||
|
||||
ROCKS_LOG_INFO(options_.info_log, "Verifying backup id %u\n", backup_id);
|
||||
|
||||
// Find all existing backup files belong to backup_id
|
||||
std::unordered_map<std::string, uint64_t> curr_abs_path_to_size;
|
||||
for (const auto& rel_dir : {GetPrivateFileRel(backup_id), GetSharedFileRel(),
|
||||
GetSharedFileWithChecksumRel()}) {
|
||||
@ -1323,13 +1327,36 @@ Status BackupEngineImpl::VerifyBackup(BackupID backup_id) {
|
||||
InsertPathnameToSizeBytes(abs_dir, backup_env_, &curr_abs_path_to_size);
|
||||
}
|
||||
|
||||
// For all files registered in backup
|
||||
for (const auto& file_info : backup->GetFiles()) {
|
||||
const auto abs_path = GetAbsolutePath(file_info->filename);
|
||||
// check existence of the file
|
||||
if (curr_abs_path_to_size.find(abs_path) == curr_abs_path_to_size.end()) {
|
||||
return Status::NotFound("File missing: " + abs_path);
|
||||
}
|
||||
// verify file size
|
||||
if (file_info->size != curr_abs_path_to_size[abs_path]) {
|
||||
return Status::Corruption("File corrupted: " + abs_path);
|
||||
std::string size_info("Expected file size is " +
|
||||
ToString(file_info->size) +
|
||||
" while found file size is " +
|
||||
ToString(curr_abs_path_to_size[abs_path]));
|
||||
return Status::Corruption("File corrupted: File size mismatch for " +
|
||||
abs_path + ": " + size_info);
|
||||
}
|
||||
if (verify_with_checksum) {
|
||||
// verify file checksum
|
||||
uint32_t checksum_value = 0;
|
||||
ROCKS_LOG_INFO(options_.info_log, "Verifying %s checksum...\n",
|
||||
abs_path.c_str());
|
||||
CalculateChecksum(abs_path, backup_env_, EnvOptions(), 0 /* size_limit */,
|
||||
&checksum_value);
|
||||
if (file_info->checksum_value != checksum_value) {
|
||||
std::string checksum_info(
|
||||
"Expected checksum is " + ToString(file_info->checksum_value) +
|
||||
" while computed checksum is " + ToString(checksum_value));
|
||||
return Status::Corruption("File corrupted: Checksum mismatch for " +
|
||||
abs_path + ": " + checksum_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
@ -2132,8 +2159,9 @@ class BackupEngineReadOnlyImpl : public BackupEngineReadOnly {
|
||||
return backup_engine_->RestoreDBFromLatestBackup(options, db_dir, wal_dir);
|
||||
}
|
||||
|
||||
Status VerifyBackup(BackupID backup_id) override {
|
||||
return backup_engine_->VerifyBackup(backup_id);
|
||||
Status VerifyBackup(BackupID backup_id,
|
||||
bool verify_with_checksum = true) override {
|
||||
return backup_engine_->VerifyBackup(backup_id, verify_with_checksum);
|
||||
}
|
||||
|
||||
Status Initialize() { return backup_engine_->Initialize(); }
|
||||
|
@ -376,6 +376,27 @@ class FileManager : public EnvWrapper {
|
||||
public:
|
||||
explicit FileManager(Env* t) : EnvWrapper(t), rnd_(5) {}
|
||||
|
||||
Status GetRandomFileInDir(const std::string& dir, std::string* fname,
|
||||
uint64_t* fsize) {
|
||||
std::vector<FileAttributes> children;
|
||||
GetChildrenFileAttributes(dir, &children);
|
||||
if (children.size() <= 2) { // . and ..
|
||||
return Status::NotFound("Empty directory: " + dir);
|
||||
}
|
||||
assert(fname != nullptr);
|
||||
while (true) {
|
||||
int i = rnd_.Next() % children.size();
|
||||
if (children[i].name != "." && children[i].name != "..") {
|
||||
fname->assign(dir + "/" + children[i].name);
|
||||
*fsize = children[i].size_bytes;
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
// should never get here
|
||||
assert(false);
|
||||
return Status::NotFound("");
|
||||
}
|
||||
|
||||
Status DeleteRandomFileInDir(const std::string& dir) {
|
||||
std::vector<std::string> children;
|
||||
GetChildren(dir, &children);
|
||||
@ -702,7 +723,6 @@ class BackupableDBTestWithParam : public BackupableDBTest,
|
||||
// invalid backups
|
||||
TEST_P(BackupableDBTestWithParam, VerifyBackup) {
|
||||
const int keys_iteration = 5000;
|
||||
Random rnd(6);
|
||||
Status s;
|
||||
OpenDBAndBackupEngine(true);
|
||||
// create five backups
|
||||
@ -1050,6 +1070,73 @@ TEST_F(BackupableDBTest, CorruptionsTest) {
|
||||
AssertBackupConsistency(2, 0, keys_iteration * 2, keys_iteration * 5);
|
||||
}
|
||||
|
||||
// Corrupt a file but maintain its size
|
||||
TEST_F(BackupableDBTest, CorruptFileMaintainSize) {
|
||||
const int keys_iteration = 5000;
|
||||
Status s;
|
||||
OpenDBAndBackupEngine(true);
|
||||
// create a backup
|
||||
FillDB(db_.get(), 0, keys_iteration);
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
|
||||
CloseDBAndBackupEngine();
|
||||
|
||||
OpenDBAndBackupEngine();
|
||||
// verify with file size
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
|
||||
// verify with file checksum
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
|
||||
|
||||
std::string file_to_corrupt;
|
||||
uint64_t file_size = 0;
|
||||
// under normal circumstance, there should be at least one nonempty file
|
||||
while (file_size == 0) {
|
||||
// get a random file in /private/1
|
||||
ASSERT_OK(file_manager_->GetRandomFileInDir(backupdir_ + "/private/1",
|
||||
&file_to_corrupt, &file_size));
|
||||
// corrupt the file by replacing its content by file_size random bytes
|
||||
ASSERT_OK(file_manager_->CorruptFile(file_to_corrupt, file_size));
|
||||
}
|
||||
// file sizes match
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
|
||||
// file checksums mismatch
|
||||
ASSERT_NOK(backup_engine_->VerifyBackup(1, true));
|
||||
// sanity check, use default second argument
|
||||
ASSERT_NOK(backup_engine_->VerifyBackup(1));
|
||||
CloseDBAndBackupEngine();
|
||||
|
||||
// an extra challenge
|
||||
// set share_files_with_checksum to true and do two more backups
|
||||
// corrupt all the table files in shared_checksum but maintain their sizes
|
||||
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
|
||||
kShareWithChecksum);
|
||||
// creat two backups
|
||||
for (int i = 1; i < 3; ++i) {
|
||||
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true));
|
||||
}
|
||||
CloseDBAndBackupEngine();
|
||||
|
||||
OpenDBAndBackupEngine();
|
||||
std::vector<FileAttributes> children;
|
||||
const std::string dir = backupdir_ + "/shared_checksum";
|
||||
ASSERT_OK(file_manager_->GetChildrenFileAttributes(dir, &children));
|
||||
for (const auto& child : children) {
|
||||
if (child.name == "." || child.name == ".." || child.size_bytes == 0) {
|
||||
continue;
|
||||
}
|
||||
// corrupt the file by replacing its content by file_size random bytes
|
||||
ASSERT_OK(
|
||||
file_manager_->CorruptFile(dir + "/" + child.name, child.size_bytes));
|
||||
}
|
||||
// file sizes match
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(1, false));
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(2, false));
|
||||
// file checksums mismatch
|
||||
ASSERT_NOK(backup_engine_->VerifyBackup(1, true));
|
||||
ASSERT_NOK(backup_engine_->VerifyBackup(2, true));
|
||||
CloseDBAndBackupEngine();
|
||||
}
|
||||
|
||||
TEST_F(BackupableDBTest, InterruptCreationTest) {
|
||||
// Interrupt backup creation by failing new writes and failing cleanup of the
|
||||
// partial state. Then verify a subsequent backup can still succeed.
|
||||
|
Loading…
Reference in New Issue
Block a user