diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index 729063614..50778530f 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -1827,56 +1827,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableErrorAutoRecover2) { Destroy(options); } -TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover1) { - // Fail the first resume and make the second resume successful - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 100000; // 0.1 second - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(1), "val1")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:BeforeWait0", - "FLushWritRetryableeErrorAutoRecover1:0"}, - {"FLushWritRetryableeErrorAutoRecover1:1", - "RecoverFromRetryableBGIOError:BeforeWait1"}, - {"RecoverFromRetryableBGIOError:RecoverSuccess", - "FLushWritRetryableeErrorAutoRecover1:2"}}); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kHardError); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover1:0"); - fault_fs_->SetFilesystemActive(true); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover1:1"); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover1:2"); - SyncPoint::GetInstance()->DisableProcessing(); - - ASSERT_EQ("val1", Get(Key(1))); - Reopen(options); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_OK(Put(Key(2), "val2")); - ASSERT_OK(Flush()); - ASSERT_EQ("val2", Get(Key(2))); - - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { +TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover1) { // Activate the FS before the first resume std::shared_ptr listener( new ErrorHandlerFSListener()); @@ -1916,7 +1867,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { Destroy(options); } -TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover3) { +TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover2) { // Fail all the resume and let user to resume std::shared_ptr listener( new ErrorHandlerFSListener()); @@ -1936,18 +1887,18 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover3) { ASSERT_OK(Put(Key(1), "val1")); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"FLushWritRetryableeErrorAutoRecover3:0", + {{"FLushWritRetryableeErrorAutoRecover2:0", "RecoverFromRetryableBGIOError:BeforeStart"}, {"RecoverFromRetryableBGIOError:LoopOut", - "FLushWritRetryableeErrorAutoRecover3:1"}}); + "FLushWritRetryableeErrorAutoRecover2:1"}}); SyncPoint::GetInstance()->SetCallBack( "BuildTable:BeforeFinishBuildTable", [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); SyncPoint::GetInstance()->EnableProcessing(); s = Flush(); ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover3:0"); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover3:1"); + TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover2:0"); + TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover2:1"); fault_fs_->SetFilesystemActive(true); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->DisableProcessing(); @@ -1965,173 +1916,6 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableErrorAutoRecover3) { Destroy(options); } -TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover4) { - // Fail the first resume and does not do resume second time because - // the IO error severity is Fatal Error and not Retryable. - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 10; // 0.1 second - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - IOStatus nr_msg = IOStatus::IOError("No Retryable Fatal IO Error"); - nr_msg.SetRetryable(false); - - ASSERT_OK(Put(Key(1), "val1")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:BeforeStart", - "FLushWritRetryableeErrorAutoRecover4:0"}, - {"FLushWritRetryableeErrorAutoRecover4:2", - "RecoverFromRetryableBGIOError:RecoverFail0"}}); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->SetCallBack( - "RecoverFromRetryableBGIOError:BeforeResume1", - [&](void*) { fault_fs_->SetFilesystemActive(false, nr_msg); }); - - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover4:0"); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover4:2"); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - // Even the FS is recoverd, due to the Fatal Error in bg_error_ the resume - // and flush will all fail. - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_NOK(dbfull()->Resume()); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_OK(Put(Key(2), "val2")); - ASSERT_NOK(Flush()); - ASSERT_EQ("NOT_FOUND", Get(Key(2))); - - Reopen(options); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_OK(Put(Key(2), "val2")); - ASSERT_OK(Flush()); - ASSERT_EQ("val2", Get(Key(2))); - - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableErrorAutoRecover5) { - // During the resume, call DB->CLose, make sure the resume thread exist - // before close continues. Due to the shutdown, the resume is not successful - // and the FS does not become active, so close status is still IO error - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 10; // 0.1 second - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(1), "val1")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:BeforeStart", - "FLushWritRetryableeErrorAutoRecover5:0"}}); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover5:0"); - // The first resume will cause recovery_error and its severity is the - // Fatal error - s = dbfull()->Close(); - ASSERT_NOK(s); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - SyncPoint::GetInstance()->DisableProcessing(); - fault_fs_->SetFilesystemActive(true); - - Reopen(options); - ASSERT_NE("val1", Get(Key(1))); - ASSERT_OK(Put(Key(2), "val2")); - s = Flush(); - ASSERT_OK(s); - ASSERT_EQ("val2", Get(Key(2))); - - Destroy(options); -} - -TEST_F(DBErrorHandlingFSTest, DISABLED_FLushWritRetryableeErrorAutoRecover6) { - // During the resume, call DB->CLose, make sure the resume thread exist - // before close continues. Due to the shutdown, the resume is not successful - // and the FS does not become active, so close status is still IO error - std::shared_ptr listener( - new ErrorHandlerFSListener()); - Options options = GetDefaultOptions(); - options.env = fault_env_.get(); - options.create_if_missing = true; - options.listeners.emplace_back(listener); - options.max_bgerror_resume_count = 2; - options.bgerror_resume_retry_interval = 10; // 0.1 second - Status s; - - listener->EnableAutoRecovery(false); - DestroyAndReopen(options); - - IOStatus error_msg = IOStatus::IOError("Retryable IO Error"); - error_msg.SetRetryable(true); - - ASSERT_OK(Put(Key(1), "val1")); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"FLushWritRetryableeErrorAutoRecover6:0", - "RecoverFromRetryableBGIOError:BeforeStart"}, - {"RecoverFromRetryableBGIOError:BeforeWait0", - "FLushWritRetryableeErrorAutoRecover6:1"}, - {"FLushWritRetryableeErrorAutoRecover6:2", - "RecoverFromRetryableBGIOError:BeforeWait1"}, - {"RecoverFromRetryableBGIOError:AfterWait0", - "FLushWritRetryableeErrorAutoRecover6:3"}}); - SyncPoint::GetInstance()->SetCallBack( - "BuildTable:BeforeFinishBuildTable", - [&](void*) { fault_fs_->SetFilesystemActive(false, error_msg); }); - SyncPoint::GetInstance()->EnableProcessing(); - s = Flush(); - ASSERT_EQ(s.severity(), ROCKSDB_NAMESPACE::Status::Severity::kSoftError); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:0"); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:1"); - fault_fs_->SetFilesystemActive(true); - ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks(); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:2"); - TEST_SYNC_POINT("FLushWritRetryableeErrorAutoRecover6:3"); - // The first resume will cause recovery_error and its severity is the - // Fatal error - s = dbfull()->Close(); - ASSERT_OK(s); - SyncPoint::GetInstance()->DisableProcessing(); - - Reopen(options); - ASSERT_EQ("val1", Get(Key(1))); - ASSERT_OK(Put(Key(2), "val2")); - s = Flush(); - ASSERT_OK(s); - ASSERT_EQ("val2", Get(Key(2))); - - Destroy(options); -} - TEST_F(DBErrorHandlingFSTest, ManifestWriteRetryableErrorAutoRecover) { // Fail the first resume and let the second resume be successful std::shared_ptr listener(