From dade61ac2699b9f669871d437c1783cbb10b5901 Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 25 Aug 2016 14:43:12 -0700 Subject: [PATCH] Mitigate regression bug of options.max_successive_merges hit during DB Recovery Summary: After 1b8a2e8fdd1db0dac3cb50228065f8e7e43095f0, DB Pointer is passed to WriteBatchInternal::InsertInto() while DB recovery. This can cause deadlock if options.max_successive_merges hits. In that case DB::Get() will be called. Get() will try to acquire the DB mutex, which is already held by the DB::Open(), causing a deadlock condition. This commit mitigates the problem by not passing the DB pointer unless 2PC is allowed. Test Plan: Add a new test and run it. Reviewers: IslamAbdelRahman, andrewkr, kradhakrishnan, horuff Reviewed By: kradhakrishnan Subscribers: leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D62625 --- db/db_impl.cc | 10 ++++++++-- db/db_test2.cc | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 61537dd1f..5508a2056 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1575,10 +1575,16 @@ Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, // we just ignore the update. // That's why we set ignore missing column families to true bool has_valid_writes = false; + // If we pass DB through and options.max_successive_merges is hit + // during recovery, Get() will be issued which will try to acquire + // DB mutex and cause deadlock, as DB mutex is already held. + // The DB pointer is not needed unless 2PC is used. + // TODO(sdong) fix the allow_2pc case too. status = WriteBatchInternal::InsertInto( &batch, column_family_memtables_.get(), &flush_scheduler_, true, - log_number, this, false /* concurrent_memtable_writes */, - next_sequence, &has_valid_writes); + log_number, db_options_.allow_2pc ? this : nullptr, + false /* concurrent_memtable_writes */, next_sequence, + &has_valid_writes); // If it is the first log file and there is no column family updated // after replaying the file, this file may be a stale file. We ignore // sequence IDs from the file. Otherwise, if a newer stale log file that diff --git a/db/db_test2.cc b/db/db_test2.cc index e89364651..c5369371d 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1843,6 +1843,22 @@ TEST_P(MergeOperatorPinningTest, TailingIterator) { } #endif // ROCKSDB_LITE +TEST_F(DBTest2, MaxSuccessiveMergesInRecovery) { + Options options; + options = CurrentOptions(options); + options.merge_operator = MergeOperators::CreatePutOperator(); + DestroyAndReopen(options); + + db_->Put(WriteOptions(), "foo", "bar"); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + + options.max_successive_merges = 3; + Reopen(options); +} } // namespace rocksdb int main(int argc, char** argv) {