Fix flaky BackupableDBTest.TableFileCorruptedBeforeBackup (#7102)

Summary:
The fix in PR https://github.com/facebook/rocksdb/issues/7082 is not really successful because there is still a small chance that the test will fail.

In addtion to flushing, we close the DB and then reopen before corrupting a table file in the DB. Specifically, we corrupt a table file before backup takes place as follows.
* Open DB
* Fill DB
* Flush DB (optional, no flushing here also works)
* Close DB
* Reopen DB
* Corrupt a table file in the DB

This should make the test reliable.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7102

Test Plan:
`while ./backupable_db_test --gtest_filter=*TableFileCorruptedBeforeBackup*; do true; done`
(kept running for an hour or so :)

Reviewed By: pdillinger

Differential Revision: D22432417

Pulled By: gg814

fbshipit-source-id: d407eee93ff428bb662f80cde1659fbf0149d0cd
This commit is contained in:
Zitan Chen 2020-07-08 12:15:02 -07:00 committed by Facebook GitHub Bot
parent 712458fc34
commit cc5c68084b

View File

@ -594,6 +594,17 @@ class BackupableDBTest : public testing::Test {
return db;
}
void CloseAndReopenDB() {
// Close DB
db_.reset();
// Open DB
test_db_env_->SetLimitWrittenFiles(1000000);
DB* db;
ASSERT_OK(DB::Open(options_, dbname_, &db));
db_.reset(db);
}
void OpenDBAndBackupEngine(bool destroy_old_data = false, bool dummy = false,
ShareOption shared_option = kShareNoChecksum) {
// reset all the defaults
@ -1186,12 +1197,14 @@ TEST_F(BackupableDBTest, TableFileCorruptedBeforeBackup) {
kNoShare);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(db_->Flush(FlushOptions()));
CloseAndReopenDB();
// corrupt a random table file in the DB directory
ASSERT_OK(CorruptRandomTableFileInDB());
// file_checksum_gen_factory is null, and thus table checksum is not
// verified for creating a new backup; no correction is detected
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
CloseDBAndBackupEngine();
// delete old files in db
ASSERT_OK(DestroyDB(dbname_, options_));
@ -1201,12 +1214,12 @@ TEST_F(BackupableDBTest, TableFileCorruptedBeforeBackup) {
kNoShare);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(db_->Flush(FlushOptions()));
CloseAndReopenDB();
// corrupt a random table file in the DB directory
ASSERT_OK(CorruptRandomTableFileInDB());
// table file checksum is enabled so we should be able to detect any
// corruption
ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
CloseDBAndBackupEngine();
}
@ -1219,11 +1232,13 @@ TEST_P(BackupableDBTestWithParam, TableFileCorruptedBeforeBackup) {
OpenDBAndBackupEngine(true /* destroy_old_data */);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(db_->Flush(FlushOptions()));
CloseAndReopenDB();
// corrupt a random table file in the DB directory
ASSERT_OK(CorruptRandomTableFileInDB());
// cannot detect corruption since DB manifest has no table checksums
ASSERT_OK(backup_engine_->CreateNewBackup(db_.get()));
CloseDBAndBackupEngine();
// delete old files in db
ASSERT_OK(DestroyDB(dbname_, options_));
@ -1232,11 +1247,11 @@ TEST_P(BackupableDBTestWithParam, TableFileCorruptedBeforeBackup) {
OpenDBAndBackupEngine(true /* destroy_old_data */);
FillDB(db_.get(), 0, keys_iteration);
ASSERT_OK(db_->Flush(FlushOptions()));
CloseAndReopenDB();
// corrupt a random table file in the DB directory
ASSERT_OK(CorruptRandomTableFileInDB());
// corruption is detected
ASSERT_NOK(backup_engine_->CreateNewBackup(db_.get()));
CloseDBAndBackupEngine();
}