Fix a flaky test DBFlushTest.SyncFail (#4633)
Summary: There is a race condition in DBFlushTest.SyncFail, as illustrated below. ``` time thread1 bg_flush_thread | Flush(wait=false, cfd) | refs_before=cfd->current()->TEST_refs() PickMemtable calls cfd->current()->Ref() V ``` The race condition between thread1 getting the ref count of cfd's current version and bg_flush_thread incrementing the cfd's current version makes it possible for later assertion on refs_before to fail. Therefore, we add test sync points to enforce the order and assert on the ref count before and after PickMemtable is called in bg_flush_thread. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4633 Differential Revision: D12967131 Pulled By: riversand963 fbshipit-source-id: a99d2bacb7869ec5d8d03b24ef2babc0e6ae1a3b
This commit is contained in:
parent
7dbee38716
commit
8d7bc76f36
@ -62,7 +62,6 @@ TEST_F(DBFlushTest, FlushWhileWritingManifest) {
|
||||
#endif // ROCKSDB_LITE
|
||||
}
|
||||
|
||||
#ifndef TRAVIS
|
||||
// Disable this test temporarily on Travis as it fails intermittently.
|
||||
// Github issue: #4151
|
||||
TEST_F(DBFlushTest, SyncFail) {
|
||||
@ -73,7 +72,11 @@ TEST_F(DBFlushTest, SyncFail) {
|
||||
options.env = fault_injection_env.get();
|
||||
|
||||
SyncPoint::GetInstance()->LoadDependency(
|
||||
{{"DBFlushTest::SyncFail:1", "DBImpl::SyncClosedLogs:Start"},
|
||||
{{"DBFlushTest::SyncFail:GetVersionRefCount:1",
|
||||
"DBImpl::FlushMemTableToOutputFile:BeforePickMemtables"},
|
||||
{"DBImpl::FlushMemTableToOutputFile:AfterPickMemtables",
|
||||
"DBFlushTest::SyncFail:GetVersionRefCount:2"},
|
||||
{"DBFlushTest::SyncFail:1", "DBImpl::SyncClosedLogs:Start"},
|
||||
{"DBImpl::SyncClosedLogs:Failed", "DBFlushTest::SyncFail:2"}});
|
||||
SyncPoint::GetInstance()->EnableProcessing();
|
||||
|
||||
@ -88,6 +91,10 @@ TEST_F(DBFlushTest, SyncFail) {
|
||||
// Flush installs a new super-version. Get the ref count after that.
|
||||
auto current_before = cfd->current();
|
||||
int refs_before = cfd->current()->TEST_refs();
|
||||
TEST_SYNC_POINT("DBFlushTest::SyncFail:GetVersionRefCount:1");
|
||||
TEST_SYNC_POINT("DBFlushTest::SyncFail:GetVersionRefCount:2");
|
||||
int refs_after_picking_memtables = cfd->current()->TEST_refs();
|
||||
ASSERT_EQ(refs_before + 1, refs_after_picking_memtables);
|
||||
fault_injection_env->SetFilesystemActive(false);
|
||||
TEST_SYNC_POINT("DBFlushTest::SyncFail:1");
|
||||
TEST_SYNC_POINT("DBFlushTest::SyncFail:2");
|
||||
@ -126,7 +133,6 @@ TEST_F(DBFlushTest, SyncSkip) {
|
||||
|
||||
Destroy(options);
|
||||
}
|
||||
#endif // TRAVIS
|
||||
|
||||
TEST_F(DBFlushTest, FlushInLowPriThreadPool) {
|
||||
// Verify setting an empty high-pri (flush) thread pool causes flushes to be
|
||||
|
@ -135,7 +135,9 @@ Status DBImpl::FlushMemTableToOutputFile(
|
||||
|
||||
FileMetaData file_meta;
|
||||
|
||||
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
|
||||
flush_job.PickMemTable();
|
||||
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:AfterPickMemtables");
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
// may temporarily unlock and lock the mutex.
|
||||
|
Loading…
Reference in New Issue
Block a user