Add kManifestWriteNoWAL to BackgroundErrorReason to handle Flush IO Error when WAL is disabled (#7693)

Summary:
In the current code base, all the manifest writes with IO error will be set with reason: BackgroundErrorReason::kManifestWrite, which will be mapped to the kHardError if the IO Error is retryable. However, if the system does not use the WAL, all the retryable IO error should be mapped to kSoftError. Create this PR to handle is special case by adding kManifestWriteNoWAL to BackgroundErrorReason.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7693

Test Plan: make check, add new testing cases to error_handler_fs_test

Reviewed By: anand1976

Differential Revision: D25066204

Pulled By: zhichao-cao

fbshipit-source-id: d59553896c2eac3fb37c05238544d2b265379462
This commit is contained in:
Zhichao Cao 2020-12-02 18:22:05 -08:00 committed by Facebook GitHub Bot
parent 3b9bfe8f14
commit 29e8f6a698
4 changed files with 167 additions and 7 deletions

View File

@ -254,8 +254,15 @@ Status DBImpl::FlushMemTableToOutputFile(
// TODO: distinguish between MANIFEST write and CURRENT renaming
if (!versions_->io_status().ok()) {
// Should handle return error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite)
.PermitUncheckedError();
if (total_log_size_ > 0) {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite)
.PermitUncheckedError();
} else {
error_handler_
.SetBGError(io_s, BackgroundErrorReason::kManifestWriteNoWAL)
.PermitUncheckedError();
}
} else if (total_log_size_ > 0) {
// Should handle return error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush)
@ -656,9 +663,16 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
// be pessimistic and try write to a new MANIFEST.
// TODO: distinguish between MANIFEST write and CURRENT renaming
if (!versions_->io_status().ok()) {
// Should Handle this error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite)
.PermitUncheckedError();
// Should handle return error?
if (total_log_size_ > 0) {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s, BackgroundErrorReason::kManifestWrite)
.PermitUncheckedError();
} else {
error_handler_
.SetBGError(io_s, BackgroundErrorReason::kManifestWriteNoWAL)
.PermitUncheckedError();
}
} else if (total_log_size_ > 0) {
// Should Handle this error?
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush)

View File

@ -111,6 +111,23 @@ std::map<std::tuple<BackgroundErrorReason, Status::Code, Status::SubCode, bool>,
Status::Code::kIOError, Status::SubCode::kIOFenced,
false),
Status::Severity::kFatalError},
// Errors during MANIFEST write when WAL is disabled
{std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
Status::Code::kIOError, Status::SubCode::kNoSpace,
true),
Status::Severity::kHardError},
{std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
Status::Code::kIOError, Status::SubCode::kNoSpace,
false),
Status::Severity::kHardError},
{std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
Status::Code::kIOError, Status::SubCode::kIOFenced,
true),
Status::Severity::kFatalError},
{std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
Status::Code::kIOError, Status::SubCode::kIOFenced,
false),
Status::Severity::kFatalError},
};
@ -175,6 +192,12 @@ std::map<std::tuple<BackgroundErrorReason, Status::Code, bool>,
{std::make_tuple(BackgroundErrorReason::kFlushNoWAL,
Status::Code::kIOError, false),
Status::Severity::kNoError},
{std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
Status::Code::kIOError, true),
Status::Severity::kFatalError},
{std::make_tuple(BackgroundErrorReason::kManifestWriteNoWAL,
Status::Code::kIOError, false),
Status::Severity::kFatalError},
};
std::map<std::tuple<BackgroundErrorReason, bool>, Status::Severity>
@ -324,6 +347,22 @@ Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reas
return bg_error_;
}
// This is the main function for looking at IO related error during the
// background operations. The main logic is:
// 1) if the error is caused by data loss, the error is mapped to
// unrecoverable error. Application/user must take action to handle
// this situation.
// 2) if the error is a Retryable IO error, auto resume will be called and the
// auto resume can be controlled by resume count and resume interval
// options. There are three sub-cases:
// a) if the error happens during compaction, it is mapped to a soft error.
// the compaction thread will reschedule a new compaction.
// b) if the error happens during flush and also WAL is empty, it is mapped
// to a soft error. Note that, it includes the case that IO error happens
// in SST or manifest write during flush.
// c) all other errors are mapped to hard error.
// 3) for other cases, SetBGError(const Status& bg_err, BackgroundErrorReason
// reason) will be called to handle other error cases.
Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
BackgroundErrorReason reason) {
db_mutex_->AssertHeld();
@ -336,7 +375,8 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
if (recovery_in_prog_ && recovery_io_error_.ok()) {
recovery_io_error_ = bg_io_err;
}
if (BackgroundErrorReason::kManifestWrite == reason) {
if (BackgroundErrorReason::kManifestWrite == reason ||
BackgroundErrorReason::kManifestWriteNoWAL == reason) {
// Always returns ok
db_->DisableFileDeletionsWithLock().PermitUncheckedError();
}
@ -374,7 +414,8 @@ Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
}
recover_context_ = context;
return bg_error_;
} else if (BackgroundErrorReason::kFlushNoWAL == reason) {
} else if (BackgroundErrorReason::kFlushNoWAL == reason ||
BackgroundErrorReason::kManifestWriteNoWAL == reason) {
// When the BG Retryable IO error reason is flush without WAL,
// We map it to a soft error. At the same time, all the background work
// should be stopped except the BG work from recovery. Therefore, we

View File

@ -453,6 +453,51 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) {
Close();
}
TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableError) {
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Options options = GetDefaultOptions();
options.env = fault_env_.get();
options.create_if_missing = true;
options.listeners.emplace_back(listener);
options.max_bgerror_resume_count = 0;
Status s;
std::string old_manifest;
std::string new_manifest;
listener->EnableAutoRecovery(false);
DestroyAndReopen(options);
old_manifest = GetManifestNameFromLiveFiles();
IOStatus error_msg = IOStatus::IOError("Retryable IO Error");
error_msg.SetRetryable(true);
WriteOptions wo = WriteOptions();
wo.disableWAL = true;
ASSERT_OK(Put(Key(0), "val", wo));
ASSERT_OK(Flush());
ASSERT_OK(Put(Key(1), "val", wo));
SyncPoint::GetInstance()->SetCallBack(
"VersionSet::LogAndApply:WriteManifest",
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
ASSERT_EQ(s, Status::OK());
new_manifest = GetManifestNameFromLiveFiles();
ASSERT_NE(new_manifest, old_manifest);
Reopen(options);
ASSERT_EQ("val", Get(Key(0)));
ASSERT_EQ("val", Get(Key(1)));
Close();
}
TEST_F(DBErrorHandlingFSTest, DoubleManifestWriteError) {
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
@ -1789,6 +1834,61 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) {
Close();
}
TEST_F(DBErrorHandlingFSTest, ManifestWriteNoWALRetryableErrorAutoRecover) {
// Fail the first resume and let the second resume be successful
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
Options options = GetDefaultOptions();
options.env = fault_env_.get();
options.create_if_missing = true;
options.listeners.emplace_back(listener);
options.max_bgerror_resume_count = 2;
options.bgerror_resume_retry_interval = 100000; // 0.1 second
Status s;
std::string old_manifest;
std::string new_manifest;
listener->EnableAutoRecovery(false);
DestroyAndReopen(options);
old_manifest = GetManifestNameFromLiveFiles();
IOStatus error_msg = IOStatus::IOError("Retryable IO Error");
error_msg.SetRetryable(true);
WriteOptions wo = WriteOptions();
wo.disableWAL = true;
ASSERT_OK(Put(Key(0), "val", wo));
ASSERT_OK(Flush());
ASSERT_OK(Put(Key(1), "val", wo));
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
{{"RecoverFromRetryableBGIOError:BeforeStart",
"ManifestWriteNoWALRetryableErrorAutoRecover:0"},
{"ManifestWriteNoWALRetryableErrorAutoRecover:1",
"RecoverFromRetryableBGIOError:BeforeWait1"},
{"RecoverFromRetryableBGIOError:RecoverSuccess",
"ManifestWriteNoWALRetryableErrorAutoRecover:2"}});
SyncPoint::GetInstance()->SetCallBack(
"VersionSet::LogAndApply:WriteManifest",
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:0");
fault_fs_->SetFilesystemActive(true);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:1");
TEST_SYNC_POINT("ManifestWriteNoWALRetryableErrorAutoRecover:2");
SyncPoint::GetInstance()->DisableProcessing();
new_manifest = GetManifestNameFromLiveFiles();
ASSERT_NE(new_manifest, old_manifest);
Reopen(options);
ASSERT_EQ("val", Get(Key(0)));
ASSERT_EQ("val", Get(Key(1)));
Close();
}
TEST_F(DBErrorHandlingFSTest,
CompactionManifestWriteRetryableErrorAutoRecover) {
std::shared_ptr<ErrorHandlerFSListener> listener(

View File

@ -120,6 +120,10 @@ enum class FlushReason : int {
kErrorRecoveryRetryFlush = 0xc,
};
// TODO: In the future, BackgroundErrorReason will only be used to indicate
// why the BG Error is happening (e.g., flush, compaction). We may introduce
// other data structure to indicate other essential information such as
// the file type (e.g., Manifest, SST) and special context.
enum class BackgroundErrorReason {
kFlush,
kCompaction,
@ -127,6 +131,7 @@ enum class BackgroundErrorReason {
kMemTable,
kManifestWrite,
kFlushNoWAL,
kManifestWriteNoWAL,
};
enum class WriteStallCondition {