Fix kPointInTimeRecovery handling of truncated WAL (#7701)
Summary: WAL may be truncated to an incomplete record due to crash while writing the last record or corruption. In the former case, no hole will be produced since no ACK'd data was lost. In the latter case, a hole could be produced without this PR since we proceeded to recover the next WAL as if nothing happened. This PR changes the record reading code to always report a corruption for incomplete records in `kPointInTimeRecovery` mode, and the upper layer will only ignore them if the next WAL has consecutive seqnum (i.e., we are guaranteed no hole). While this solves the hole problem for the case of incomplete records, the possibility is still there if the WAL is corrupted by truncation to an exact record boundary. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7701 Test Plan: Interestingly there already was a test for this case (`DBWALTestWithParams.kPointInTimeRecovery`); it just had a typo bug in the verification that prevented it from noticing holes in recovery. Reviewed By: anand1976 Differential Revision: D25111765 Pulled By: ajkr fbshipit-source-id: 5e330b13b1ee2b5be096cea9d0ff6075843e57b6
This commit is contained in:
parent
a5a5be4b4c
commit
cac1fc4dfb
@ -1,4 +1,8 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
### Bug Fixes
|
||||
* Truncated WALs ending in incomplete records can no longer produce gaps in the recovered data when `WALRecoveryMode::kPointInTimeRecovery` is used. Gaps are still possible when WALs are truncated exactly on record boundaries.
|
||||
|
||||
## 6.15.0 (11/13/2020)
|
||||
### Bug Fixes
|
||||
* Fixed a bug in the following combination of features: indexes with user keys (`format_version >= 3`), indexes are partitioned (`index_type == kTwoLevelIndexSearch`), and some index partitions are pinned in memory (`BlockBasedTableOptions::pin_l0_filter_and_index_blocks_in_cache`). The bug could cause keys to be truncated when read from the index leading to wrong read results or other unexpected behavior.
|
||||
|
@ -1348,13 +1348,19 @@ TEST_P(DBWALTestWithParams, kPointInTimeRecovery) {
|
||||
size_t recovered_row_count = RecoveryTestHelper::GetData(this);
|
||||
ASSERT_LT(recovered_row_count, row_count);
|
||||
|
||||
bool expect_data = true;
|
||||
for (size_t k = 0; k < maxkeys; ++k) {
|
||||
bool found = Get("key" + ToString(corrupt_offset)) != "NOT_FOUND";
|
||||
if (expect_data && !found) {
|
||||
expect_data = false;
|
||||
// Verify a prefix of keys were recovered. But not in the case of full WAL
|
||||
// truncation, because we have no way to know there was a corruption when
|
||||
// truncation happened on record boundaries (preventing recovery holes in
|
||||
// that case requires using `track_and_verify_wals_in_manifest`).
|
||||
if (!trunc || corrupt_offset != 0) {
|
||||
bool expect_data = true;
|
||||
for (size_t k = 0; k < maxkeys; ++k) {
|
||||
bool found = Get("key" + ToString(k)) != "NOT_FOUND";
|
||||
if (expect_data && !found) {
|
||||
expect_data = false;
|
||||
}
|
||||
ASSERT_EQ(found, expect_data);
|
||||
}
|
||||
ASSERT_EQ(found, expect_data);
|
||||
}
|
||||
|
||||
const size_t min = RecoveryTestHelper::kKeysPerWALFile *
|
||||
|
@ -119,16 +119,26 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
|
||||
break;
|
||||
|
||||
case kBadHeader:
|
||||
if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) {
|
||||
// in clean shutdown we don't expect any error in the log files
|
||||
if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
|
||||
wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
|
||||
// In clean shutdown we don't expect any error in the log files.
|
||||
// In point-in-time recovery an incomplete record at the end could
|
||||
// produce a hole in the recovered data. Report an error here, which
|
||||
// higher layers can choose to ignore when it's provable there is no
|
||||
// hole.
|
||||
ReportCorruption(drop_size, "truncated header");
|
||||
}
|
||||
FALLTHROUGH_INTENDED;
|
||||
|
||||
case kEof:
|
||||
if (in_fragmented_record) {
|
||||
if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) {
|
||||
// in clean shutdown we don't expect any error in the log files
|
||||
if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
|
||||
wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
|
||||
// In clean shutdown we don't expect any error in the log files.
|
||||
// In point-in-time recovery an incomplete record at the end could
|
||||
// produce a hole in the recovered data. Report an error here, which
|
||||
// higher layers can choose to ignore when it's provable there is no
|
||||
// hole.
|
||||
ReportCorruption(scratch->size(), "error reading trailing data");
|
||||
}
|
||||
// This can be caused by the writer dying immediately after
|
||||
@ -142,8 +152,13 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
|
||||
if (wal_recovery_mode != WALRecoveryMode::kSkipAnyCorruptedRecords) {
|
||||
// Treat a record from a previous instance of the log as EOF.
|
||||
if (in_fragmented_record) {
|
||||
if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency) {
|
||||
// in clean shutdown we don't expect any error in the log files
|
||||
if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
|
||||
wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
|
||||
// In clean shutdown we don't expect any error in the log files.
|
||||
// In point-in-time recovery an incomplete record at the end could
|
||||
// produce a hole in the recovered data. Report an error here,
|
||||
// which higher layers can choose to ignore when it's provable
|
||||
// there is no hole.
|
||||
ReportCorruption(scratch->size(), "error reading trailing data");
|
||||
}
|
||||
// This can be caused by the writer dying immediately after
|
||||
@ -164,6 +179,20 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch,
|
||||
break;
|
||||
|
||||
case kBadRecordLen:
|
||||
if (eof_) {
|
||||
if (wal_recovery_mode == WALRecoveryMode::kAbsoluteConsistency ||
|
||||
wal_recovery_mode == WALRecoveryMode::kPointInTimeRecovery) {
|
||||
// In clean shutdown we don't expect any error in the log files.
|
||||
// In point-in-time recovery an incomplete record at the end could
|
||||
// produce a hole in the recovered data. Report an error here, which
|
||||
// higher layers can choose to ignore when it's provable there is no
|
||||
// hole.
|
||||
ReportCorruption(drop_size, "truncated record body");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
FALLTHROUGH_INTENDED;
|
||||
|
||||
case kBadRecordChecksum:
|
||||
if (recycled_ &&
|
||||
wal_recovery_mode ==
|
||||
@ -355,18 +384,14 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result, size_t* drop_size) {
|
||||
}
|
||||
}
|
||||
if (header_size + length > buffer_.size()) {
|
||||
assert(buffer_.size() >= static_cast<size_t>(header_size));
|
||||
*drop_size = buffer_.size();
|
||||
buffer_.clear();
|
||||
if (!eof_) {
|
||||
return kBadRecordLen;
|
||||
}
|
||||
// If the end of the file has been reached without reading |length|
|
||||
// bytes of payload, assume the writer died in the middle of writing the
|
||||
// record. Don't report a corruption unless requested.
|
||||
if (*drop_size) {
|
||||
return kBadHeader;
|
||||
}
|
||||
return kEof;
|
||||
// If the end of the read has been reached without seeing
|
||||
// `header_size + length` bytes of payload, report a corruption. The
|
||||
// higher layers can decide how to handle it based on the recovery mode,
|
||||
// whether this occurred at EOF, whether this is the final WAL, etc.
|
||||
return kBadRecordLen;
|
||||
}
|
||||
|
||||
if (type == kZeroType && length == 0) {
|
||||
|
@ -465,7 +465,7 @@ TEST_P(LogTest, BadLengthAtEndIsNotIgnored) {
|
||||
ShrinkSize(1);
|
||||
ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency));
|
||||
ASSERT_GT(DroppedBytes(), 0U);
|
||||
ASSERT_EQ("OK", MatchError("Corruption: truncated header"));
|
||||
ASSERT_EQ("OK", MatchError("Corruption: truncated record body"));
|
||||
}
|
||||
|
||||
TEST_P(LogTest, ChecksumMismatch) {
|
||||
@ -573,9 +573,7 @@ TEST_P(LogTest, PartialLastIsNotIgnored) {
|
||||
ShrinkSize(1);
|
||||
ASSERT_EQ("EOF", Read(WALRecoveryMode::kAbsoluteConsistency));
|
||||
ASSERT_GT(DroppedBytes(), 0U);
|
||||
ASSERT_EQ("OK", MatchError(
|
||||
"Corruption: truncated headerCorruption: "
|
||||
"error reading trailing data"));
|
||||
ASSERT_EQ("OK", MatchError("Corruption: truncated record body"));
|
||||
}
|
||||
|
||||
TEST_P(LogTest, ErrorJoinsRecords) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user