Prevent manual flush hanging in read-only mode (#4615)
Summary: The logic to wait for stall conditions to clear before beginning a manual flush didn't take into account whether the DB was in read-only mode. In read-only mode the stall conditions would never clear since no background work is happening, so the wait would be never-ending. It's probably better to return an error to the user. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4615 Differential Revision: D12888008 Pulled By: ajkr fbshipit-source-id: 1c474b42a7ac38d9fd0d0e2340ff1d53e684d83c
This commit is contained in:
parent
b8f68bac38
commit
5c794d94c4
@ -220,6 +220,44 @@ TEST_F(DBFlushTest, FlushError) {
|
|||||||
ASSERT_NE(s, Status::OK());
|
ASSERT_NE(s, Status::OK());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DBFlushTest, ManualFlushFailsInReadOnlyMode) {
|
||||||
|
// Regression test for bug where manual flush hangs forever when the DB
|
||||||
|
// is in read-only mode. Verify it now at least returns, despite failing.
|
||||||
|
Options options;
|
||||||
|
std::unique_ptr<FaultInjectionTestEnv> fault_injection_env(
|
||||||
|
new FaultInjectionTestEnv(env_));
|
||||||
|
options.env = fault_injection_env.get();
|
||||||
|
options.max_write_buffer_number = 2;
|
||||||
|
Reopen(options);
|
||||||
|
|
||||||
|
// Trigger a first flush but don't let it run
|
||||||
|
ASSERT_OK(db_->PauseBackgroundWork());
|
||||||
|
ASSERT_OK(Put("key1", "value1"));
|
||||||
|
FlushOptions flush_opts;
|
||||||
|
flush_opts.wait = false;
|
||||||
|
ASSERT_OK(db_->Flush(flush_opts));
|
||||||
|
|
||||||
|
// Write a key to the second memtable so we have something to flush later
|
||||||
|
// after the DB is in read-only mode.
|
||||||
|
ASSERT_OK(Put("key2", "value2"));
|
||||||
|
|
||||||
|
// Let the first flush continue, hit an error, and put the DB in read-only
|
||||||
|
// mode.
|
||||||
|
fault_injection_env->SetFilesystemActive(false);
|
||||||
|
ASSERT_OK(db_->ContinueBackgroundWork());
|
||||||
|
dbfull()->TEST_WaitForFlushMemTable();
|
||||||
|
uint64_t num_bg_errors;
|
||||||
|
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBackgroundErrors,
|
||||||
|
&num_bg_errors));
|
||||||
|
ASSERT_GT(num_bg_errors, 0);
|
||||||
|
|
||||||
|
// In the bug scenario, triggering another flush would cause the second flush
|
||||||
|
// to hang forever. After the fix we expect it to return an error.
|
||||||
|
ASSERT_NOK(db_->Flush(FlushOptions()));
|
||||||
|
|
||||||
|
Close();
|
||||||
|
}
|
||||||
|
|
||||||
TEST_P(DBAtomicFlushTest, ManualAtomicFlush) {
|
TEST_P(DBAtomicFlushTest, ManualAtomicFlush) {
|
||||||
Options options = CurrentOptions();
|
Options options = CurrentOptions();
|
||||||
options.create_if_missing = true;
|
options.create_if_missing = true;
|
||||||
|
@ -1543,6 +1543,14 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
|
|||||||
WriteStallCondition write_stall_condition = WriteStallCondition::kNormal;
|
WriteStallCondition write_stall_condition = WriteStallCondition::kNormal;
|
||||||
do {
|
do {
|
||||||
if (write_stall_condition != WriteStallCondition::kNormal) {
|
if (write_stall_condition != WriteStallCondition::kNormal) {
|
||||||
|
// Same error handling as user writes: Don't wait if there's a
|
||||||
|
// background error, even if it's a soft error. We might wait here
|
||||||
|
// indefinitely as the pending flushes/compactions may never finish
|
||||||
|
// successfully, resulting in the stall condition lasting indefinitely
|
||||||
|
if (error_handler_.IsBGWorkStopped()) {
|
||||||
|
return error_handler_.GetBGError();
|
||||||
|
}
|
||||||
|
|
||||||
TEST_SYNC_POINT("DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait");
|
TEST_SYNC_POINT("DBImpl::WaitUntilFlushWouldNotStallWrites:StallWait");
|
||||||
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
||||||
"[%s] WaitUntilFlushWouldNotStallWrites"
|
"[%s] WaitUntilFlushWouldNotStallWrites"
|
||||||
|
Loading…
Reference in New Issue
Block a user