Fix flush no wal IO error bug (#8107)

Summary:
There is bug in the current code base introduced in https://github.com/facebook/rocksdb/issues/8049 , we still set the SST file write IO Error only case as hard error. Fix it by removing the logic.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8107

Test Plan: make check, error_handler_fs_test

Reviewed By: anand1976

Differential Revision: D27321422

Pulled By: zhichao-cao

fbshipit-source-id: c014afc1553ca66b655e3bbf9d0bf6eb417ccf94
This commit is contained in:
Zhichao Cao 2021-03-25 21:41:44 -07:00
parent 5b3ebdc3d1
commit d5e2462946
2 changed files with 44 additions and 48 deletions

View File

@ -260,18 +260,16 @@ Status DBImpl::FlushMemTableToOutputFile(
// be pessimistic and try write to a new MANIFEST.
// TODO: distinguish between MANIFEST write and CURRENT renaming
if (!versions_->io_status().ok()) {
if (total_log_size_ > 0) {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWrite);
} else {
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
}
} else if (total_log_size_ > 0 || !log_io_s.ok()) {
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
// If WAL sync is successful (either WAL size is 0 or there is no IO
// error), all the Manifest write will be map to soft error.
// TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor is
// needed.
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
} else {
// If the WAL is empty, we use different error reason
// If WAL sync is successful (either WAL size is 0 or there is no IO
// error), all the other SST file write errors will be set as
// kFlushNoWAL.
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
}
} else {
@ -687,18 +685,16 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
// be pessimistic and try write to a new MANIFEST.
// TODO: distinguish between MANIFEST write and CURRENT renaming
if (!versions_->io_status().ok()) {
if (total_log_size_ > 0) {
// If the WAL is empty, we use different error reason
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWrite);
} else {
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
}
} else if (total_log_size_ > 0) {
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
// If WAL sync is successful (either WAL size is 0 or there is no IO
// error), all the Manifest write will be map to soft error.
// TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor
// is needed.
error_handler_.SetBGError(io_s,
BackgroundErrorReason::kManifestWriteNoWAL);
} else {
// If the WAL is empty, we use different error reason
// If WAL sync is successful (either WAL size is 0 or there is no IO
// error), all the other SST file write errors will be set as
// kFlushNoWAL.
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
}
} else {

View File

@ -216,7 +216,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
@ -242,7 +242,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
@ -256,7 +256,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
@ -292,7 +292,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
@ -306,7 +306,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
@ -320,7 +320,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
@ -340,7 +340,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
s = dbfull()->Resume();
@ -649,7 +649,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
@ -695,7 +695,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteFileScopeError) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
@ -1698,7 +1698,7 @@ TEST_F(DBErrorHandlingFSTest, MultiDBVariousErrors) {
// to soft error and trigger auto resume. During auto resume, SwitchMemtable
// is disabled to avoid small SST tables. Write can still be applied before
// the bg error is cleaned unless the memtable is full.
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover1) {
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover1) {
// Activate the FS before the first resume
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
@ -1768,7 +1768,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover1) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover2) {
// Activate the FS before the first resume
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
@ -1810,14 +1810,14 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ASSERT_LE(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
ASSERT_LE(0, options.statistics->getAndResetTickerCount(
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
HistogramData autoresume_retry;
options.statistics->histogramData(ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
&autoresume_retry);
ASSERT_EQ(autoresume_retry.max, 1);
ASSERT_GE(autoresume_retry.max, 0);
ASSERT_OK(Put(Key(2), "val2", wo));
s = Flush();
// Since auto resume is successful, the bg error is cleaned, flush will
@ -1827,7 +1827,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover1) {
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover1) {
// Fail the first resume and make the second resume successful
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
@ -1876,7 +1876,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover1) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover2) {
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) {
// Activate the FS before the first resume
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
@ -1901,7 +1901,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover2) {
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
SyncPoint::GetInstance()->DisableProcessing();
fault_fs_->SetFilesystemActive(true);
ASSERT_EQ(listener->WaitForRecovery(5000000), true);
@ -1916,7 +1916,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover2) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover3) {
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover3) {
// Fail all the resume and let user to resume
std::shared_ptr<ErrorHandlerFSListener> listener(
new ErrorHandlerFSListener());
@ -1945,7 +1945,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover3) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover3:0");
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover3:1");
fault_fs_->SetFilesystemActive(true);
@ -1965,7 +1965,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover3) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover4) {
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover4) {
// Fail the first resume and does not do resume second time because
// the IO error severity is Fatal Error and not Retryable.
std::shared_ptr<ErrorHandlerFSListener> listener(
@ -2001,7 +2001,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover4) {
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover4:0");
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover4:2");
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
@ -2025,7 +2025,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover4) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover5) {
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover5) {
// During the resume, call DB->CLose, make sure the resume thread exist
// before close continues. Due to the shutdown, the resume is not successful
// and the FS does not become active, so close status is still IO error
@ -2054,7 +2054,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover5) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover5:0");
// The first resume will cause recovery_error and its severity is the
// Fatal error
@ -2074,7 +2074,7 @@ TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover5) {
Destroy(options);
}
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover6) {
TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover6) {
// During the resume, call DB->CLose, make sure the resume thread exist
// before close continues. Due to the shutdown, the resume is not successful
// and the FS does not become active, so close status is still IO error
@ -2109,7 +2109,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableeErrorAutoRecover6) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:0");
TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:1");
fault_fs_->SetFilesystemActive(true);
@ -2168,7 +2168,7 @@ TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) {
[&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); });
SyncPoint::GetInstance()->EnableProcessing();
s = Flush();
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError);
ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError);
TEST_SYNC_POINT("ManifestWriteRetryableErrorAutoRecover:0");
fault_fs_->SetFilesystemActive(true);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();