Add test kPointInTimeRecoveryCFConsistency
Summary: Context/problem: - CFs may be flushed at different times - A WAL can only be deleted after all CFs have flushed beyond end of that WAL. - Point-in-time recovery might stop upon reaching the first corruption. - Some CFs may have already flushed beyond that point, while others haven't. We should fail the Open() instead of proceeding with inconsistent CFs. Closes https://github.com/facebook/rocksdb/pull/2900 Differential Revision: D5863281 Pulled By: miasantreble fbshipit-source-id: 180dbaf83d96c804cff49b3c406312a4ae61313e
This commit is contained in:
parent
be97dbb15c
commit
1d6700f9e6
@ -5,6 +5,7 @@
|
||||
|
||||
### New Features
|
||||
### Bug Fixes
|
||||
* Fix a potential data inconsistency issue during point-in-time recovery. `DB:Open()` will abort if column family inconsistency is found during PIT recovery.
|
||||
|
||||
## 5.8.0 (08/30/2017)
|
||||
### Public API Change
|
||||
|
@ -493,6 +493,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
|
||||
bool stop_replay_by_wal_filter = false;
|
||||
bool stop_replay_for_corruption = false;
|
||||
bool flushed = false;
|
||||
uint64_t corrupted_log_number = kMaxSequenceNumber;
|
||||
for (auto log_number : log_numbers) {
|
||||
// The previous incarnation may not have written any MANIFEST
|
||||
// records after allocating this log number. So we manually
|
||||
@ -720,6 +721,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
|
||||
// We should ignore the error but not continue replaying
|
||||
status = Status::OK();
|
||||
stop_replay_for_corruption = true;
|
||||
corrupted_log_number = log_number;
|
||||
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
||||
"Point in time recovered to log #%" PRIu64
|
||||
" seq #%" PRIu64,
|
||||
@ -741,6 +743,25 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
|
||||
versions_->SetLastSequence(last_sequence);
|
||||
}
|
||||
}
|
||||
// Compare the corrupted log number to all columnfamily's current log number.
|
||||
// Abort Open() if any column family's log number is greater than
|
||||
// the corrupted log number, which means CF contains data beyond the point of
|
||||
// corruption. This could during PIT recovery when the WAL is corrupted and
|
||||
// some (but not all) CFs are flushed
|
||||
if (stop_replay_for_corruption == true &&
|
||||
(immutable_db_options_.wal_recovery_mode ==
|
||||
WALRecoveryMode::kPointInTimeRecovery ||
|
||||
immutable_db_options_.wal_recovery_mode ==
|
||||
WALRecoveryMode::kTolerateCorruptedTailRecords)) {
|
||||
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
||||
if (cfd->GetLogNumber() > corrupted_log_number) {
|
||||
ROCKS_LOG_ERROR(immutable_db_options_.info_log,
|
||||
"Column family inconsistency: SST file contains data"
|
||||
" beyond the point of corruption.");
|
||||
return Status::Corruption("SST file is ahead of WALs");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// True if there's any data in the WALs; if not, we can skip re-processing
|
||||
// them later
|
||||
|
@ -876,6 +876,39 @@ TEST_F(DBWALTest, kAbsoluteConsistency) {
|
||||
}
|
||||
}
|
||||
|
||||
// Test scope:
|
||||
// We don't expect the data store to be opened if there is any inconsistency
|
||||
// between WAL and SST files
|
||||
TEST_F(DBWALTest, kPointInTimeRecoveryCFConsistency) {
|
||||
Options options = CurrentOptions();
|
||||
options.avoid_flush_during_recovery = true;
|
||||
|
||||
// Create DB with multiple column families.
|
||||
CreateAndReopenWithCF({"one", "two"}, options);
|
||||
ASSERT_OK(Put(1, "key1", "val1"));
|
||||
ASSERT_OK(Put(2, "key2", "val2"));
|
||||
|
||||
// Record the offset at this point
|
||||
Env* env = options.env;
|
||||
int wal_file_id = RecoveryTestHelper::kWALFileOffset + 1;
|
||||
std::string fname = LogFileName(dbname_, wal_file_id);
|
||||
uint64_t offset_to_corrupt;
|
||||
ASSERT_OK(env->GetFileSize(fname, &offset_to_corrupt));
|
||||
ASSERT_GT(offset_to_corrupt, 0);
|
||||
|
||||
ASSERT_OK(Put(1, "key3", "val3"));
|
||||
// Corrupt WAL at location of key3
|
||||
RecoveryTestHelper::InduceCorruption(
|
||||
fname, static_cast<size_t>(offset_to_corrupt), static_cast<size_t>(4));
|
||||
ASSERT_OK(Put(2, "key4", "val4"));
|
||||
ASSERT_OK(Put(1, "key5", "val5"));
|
||||
Flush(2);
|
||||
|
||||
// PIT recovery & verify
|
||||
options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
|
||||
ASSERT_NOK(TryReopenWithColumnFamilies({"default", "one", "two"}, options));
|
||||
}
|
||||
|
||||
// Test scope:
|
||||
// - We expect to open data store under all circumstances
|
||||
// - We expect only data upto the point where the first error was encountered
|
||||
|
Loading…
Reference in New Issue
Block a user