Fix flaky BackupableDBTest.CustomChecksumTransition (#7254)
Summary: The flaky test in the title is caused by two problems. First, there is a bug in the BackupEngine that results in skipping computing the default crc32 checksum when `share_table_files` is enabled and the table is already backed up. Second, when `RestoreDBFromBackup` fails and the backup was being restored to the DB directory, it is likely that `RestoreDBFromBackup` has cleaned up the DB directory before it fails, and therefore, files in old backups may collide with files to be backed up if `share_files_with_checksum` is not enabled. New tests that cover the above problems are added. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7254 Test Plan: `./backupable_db_test` Reviewed By: pdillinger Differential Revision: D23118715 Pulled By: gg814 fbshipit-source-id: 7be8de912808944be59e93d602c7431a54c079eb
This commit is contained in:
parent
a1aa3f8385
commit
15245e9018
@ -1807,7 +1807,7 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
|
||||
std::string backup_checksum_func_name = kDefaultBackupFileChecksumFuncName;
|
||||
std::string db_id;
|
||||
std::string db_session_id;
|
||||
// whether the checksum for a table file is available
|
||||
// whether a default or custom checksum for a table file is available
|
||||
bool has_checksum = false;
|
||||
|
||||
// Set up the custom checksum function.
|
||||
@ -1956,10 +1956,13 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
|
||||
backup_env_->DeleteFile(final_dest_path);
|
||||
} else {
|
||||
// file exists and referenced
|
||||
if (!has_checksum) {
|
||||
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
|
||||
size_limit, &checksum_hex, checksum_func,
|
||||
&custom_checksum_hex);
|
||||
if (!has_checksum || checksum_hex.empty()) {
|
||||
// Either both checksum_hex and custom_checksum_hex need recalculating
|
||||
// or only checksum_hex needs recalculating
|
||||
s = CalculateChecksum(
|
||||
src_dir + fname, db_env_, src_env_options, size_limit,
|
||||
&checksum_hex, checksum_func,
|
||||
checksum_hex.empty() ? nullptr : &custom_checksum_hex);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
@ -1999,10 +2002,13 @@ Status BackupEngineImpl::AddBackupFileWorkItem(
|
||||
// the file is present and referenced by a backup
|
||||
ROCKS_LOG_INFO(options_.info_log,
|
||||
"%s already present, calculate checksum", fname.c_str());
|
||||
if (!has_checksum) {
|
||||
s = CalculateChecksum(src_dir + fname, db_env_, src_env_options,
|
||||
size_limit, &checksum_hex, checksum_func,
|
||||
&custom_checksum_hex);
|
||||
if (!has_checksum || checksum_hex.empty()) {
|
||||
// Either both checksum_hex and custom_checksum_hex need recalculating
|
||||
// or only checksum_hex needs recalculating
|
||||
s = CalculateChecksum(
|
||||
src_dir + fname, db_env_, src_env_options, size_limit,
|
||||
&checksum_hex, checksum_func,
|
||||
checksum_hex.empty() ? nullptr : &custom_checksum_hex);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
@ -2475,6 +2481,7 @@ Status BackupEngineImpl::BackupMeta::AddFile(
|
||||
"try again.");
|
||||
} else if (IsSameChecksumFunc(itr->second->checksum_func_name,
|
||||
file_info->checksum_func_name) &&
|
||||
!itr->second->custom_checksum_hex.empty() &&
|
||||
itr->second->custom_checksum_hex !=
|
||||
file_info->custom_checksum_hex) {
|
||||
return Status::Corruption(
|
||||
|
@ -1053,12 +1053,23 @@ TEST_F(BackupableDBTest, CustomChecksumTransition) {
|
||||
sopt);
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
CloseBackupEngine();
|
||||
|
||||
++i;
|
||||
// Say, we accidentally change the factory
|
||||
backupable_options_->file_checksum_gen_factory = hash32_factory;
|
||||
OpenBackupEngine();
|
||||
ASSERT_NOK(backup_engine_->VerifyBackup(i + 2, true));
|
||||
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(i + 2, dbname_, dbname_));
|
||||
ASSERT_OK(backup_engine_->DeleteBackup(i + 2));
|
||||
// Unable to verify the latest backup.
|
||||
ASSERT_NOK(backup_engine_->VerifyBackup(i + 1, true));
|
||||
// Unable to restore the latest backup.
|
||||
ASSERT_NOK(backup_engine_->RestoreDBFromBackup(i + 1, dbname_, dbname_));
|
||||
CloseBackupEngine();
|
||||
// Reset the factory to the same as the one used in the db.
|
||||
backupable_options_->file_checksum_gen_factory = hash_factory;
|
||||
OpenBackupEngine();
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(i + 1, true));
|
||||
ASSERT_OK(backup_engine_->RestoreDBFromBackup(i + 1, dbname_, dbname_));
|
||||
ASSERT_OK(backup_engine_->DeleteBackup(i + 1));
|
||||
--i;
|
||||
CloseDBAndBackupEngine();
|
||||
|
||||
// 3) with one custom checksum function (FileHash32GenFactory) for db
|
||||
@ -1106,6 +1117,79 @@ TEST_F(BackupableDBTest, CustomChecksumTransition) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BackupableDBTest, CustomChecksumNoNewDbTables) {
|
||||
const int keys_iteration = 5000;
|
||||
std::vector<std::shared_ptr<FileChecksumGenFactory>> checksum_factories{
|
||||
nullptr, GetFileChecksumGenCrc32cFactory(),
|
||||
std::make_shared<FileHash32GenFactory>(),
|
||||
std::make_shared<FileHashGenFactory>()};
|
||||
|
||||
for (const auto& sopt : kAllShareOptions) {
|
||||
for (const auto& f : checksum_factories) {
|
||||
options_.file_checksum_gen_factory = f;
|
||||
backupable_options_->file_checksum_gen_factory = f;
|
||||
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */,
|
||||
sopt);
|
||||
FillDB(db_.get(), 0, keys_iteration);
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(1, true));
|
||||
// No new table files have been created since the last backup.
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(2, true));
|
||||
CloseDBAndBackupEngine();
|
||||
AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 2);
|
||||
AssertBackupConsistency(2, 0, keys_iteration, keys_iteration * 2);
|
||||
|
||||
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
|
||||
sopt);
|
||||
// No new table files have been created since the last backup and backup
|
||||
// engine opening
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
ASSERT_OK(backup_engine_->VerifyBackup(3, true));
|
||||
CloseDBAndBackupEngine();
|
||||
AssertBackupConsistency(3, 0, keys_iteration, keys_iteration * 2);
|
||||
|
||||
// delete old data
|
||||
DestroyDB(dbname_, options_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(BackupableDBTest, FileCollision) {
|
||||
const int keys_iteration = 5000;
|
||||
for (const auto& sopt : kAllShareOptions) {
|
||||
OpenDBAndBackupEngine(true /* destroy_old_data */, false /* dummy */, sopt);
|
||||
FillDB(db_.get(), 0, keys_iteration);
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
FillDB(db_.get(), 0, keys_iteration);
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
CloseDBAndBackupEngine();
|
||||
|
||||
// If the db directory has been cleaned up, it is sensitive to file
|
||||
// collision.
|
||||
DestroyDB(dbname_, options_);
|
||||
|
||||
// open with old backup
|
||||
OpenDBAndBackupEngine(false /* destroy_old_data */, false /* dummy */,
|
||||
sopt);
|
||||
FillDB(db_.get(), 0, keys_iteration * 2);
|
||||
if (sopt != kShareNoChecksum) {
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
} else {
|
||||
// The new table files created in FillDB() will clash with the old
|
||||
// backup and sharing tables with no checksum will have the file
|
||||
// collision problem.
|
||||
ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
ASSERT_OK(backup_engine_->PurgeOldBackups(0));
|
||||
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
|
||||
}
|
||||
CloseDBAndBackupEngine();
|
||||
|
||||
// delete old data
|
||||
DestroyDB(dbname_, options_);
|
||||
}
|
||||
}
|
||||
|
||||
// This test verifies that the verifyBackup method correctly identifies
|
||||
// invalid backups
|
||||
TEST_P(BackupableDBTestWithParam, VerifyBackup) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user