From c6ff4c0b70d249d265307607e5a677961a3f2f58 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Tue, 26 Jan 2021 17:00:17 -0800 Subject: [PATCH] Fix deadlock in `fs_test.WALWriteRetryableErrorAutoRecover1` (#7897) Summary: The recovery thread could hold the db.mutex, which is needed from sync write in main thread. Make sure the write is done before recovery thread starts. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7897 Test Plan: `gtest-parallel ./error_handler_fs_test --gtest_filter=DBErrorHandlingFSTest.WALWriteRetryableErrorAutoRecover1 -r 10000 --workers=200` Reviewed By: zhichao-cao Differential Revision: D26082933 Pulled By: jay-zhuang fbshipit-source-id: 226fc49228c0e5903f86ff45cc3fed3080abdb1f --- db/error_handler_fs_test.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/db/error_handler_fs_test.cc b/db/error_handler_fs_test.cc index 399f3651e..f344c7d16 100644 --- a/db/error_handler_fs_test.cc +++ b/db/error_handler_fs_test.cc @@ -2248,7 +2248,8 @@ TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover1) { ASSERT_OK(batch.Put(Key(i), rnd.RandomString(1024))); } ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( - {{"RecoverFromRetryableBGIOError:BeforeResume0", "WALWriteError1:0"}, + {{"WALWriteErrorDone", "RecoverFromRetryableBGIOError:BeforeStart"}, + {"RecoverFromRetryableBGIOError:BeforeResume0", "WALWriteError1:0"}, {"WALWriteError1:1", "RecoverFromRetryableBGIOError:BeforeResume1"}, {"RecoverFromRetryableBGIOError:RecoverSuccess", "WALWriteError1:2"}}); @@ -2264,6 +2265,7 @@ TEST_F(DBErrorHandlingFSTest, WALWriteRetryableErrorAutoRecover1) { wopts.sync = true; s = dbfull()->Write(wopts, &batch); ASSERT_EQ(true, s.IsIOError()); + TEST_SYNC_POINT("WALWriteErrorDone"); TEST_SYNC_POINT("WALWriteError1:0"); fault_fs_->SetFilesystemActive(true);