From a31b8cb7a78a26e5fa7a4fde30295e3e63caa15e Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 25 Aug 2016 14:43:12 -0700 Subject: [PATCH] Mitigate regression bug of options.max_successive_merges hit during DB Recovery Summary: After 1b8a2e8fdd1db0dac3cb50228065f8e7e43095f0, DB Pointer is passed to WriteBatchInternal::InsertInto() while DB recovery. This can cause deadlock if options.max_successive_merges hits. In that case DB::Get() will be called. Get() will try to acquire the DB mutex, which is already held by the DB::Open(), causing a deadlock condition. This commit mitigates the problem by not passing the DB pointer unless 2PC is allowed. Test Plan: Add a new test and run it. Reviewers: IslamAbdelRahman, andrewkr, kradhakrishnan, horuff Reviewed By: kradhakrishnan Subscribers: leveldb, andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D62625 --- db/db_impl.cc | 9 +++++++-- db/db_test2.cc | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index a445db805..a4f0c4cc2 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -1563,10 +1563,15 @@ Status DBImpl::RecoverLogFiles(const std::vector& log_numbers, // insert. We don't want to fail the whole write batch in that case -- // we just ignore the update. // That's why we set ignore missing column families to true + // If we pass DB through and options.max_successive_merges is hit + // during recovery, Get() will be issued which will try to acquire + // DB mutex and cause deadlock, as DB mutex is already held. + // The DB pointer is not needed unless 2PC is used. + // TODO(sdong) fix the allow_2pc case too. status = WriteBatchInternal::InsertInto( &batch, column_family_memtables_.get(), &flush_scheduler_, true, - log_number, this, false /* concurrent_memtable_writes */, - next_sequence); + log_number, db_options_.allow_2pc ? this : nullptr, + false /* concurrent_memtable_writes */, next_sequence); MaybeIgnoreError(&status); if (!status.ok()) { // We are treating this as a failure while reading since we read valid diff --git a/db/db_test2.cc b/db/db_test2.cc index 306099ef8..ccd59a209 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1487,6 +1487,23 @@ TEST_F(DBTest2, SyncPointMarker) { rocksdb::SyncPoint::GetInstance()->DisableProcessing(); } #endif + +TEST_F(DBTest2, MaxSuccessiveMergesInRecovery) { + Options options; + options = CurrentOptions(options); + options.merge_operator = MergeOperators::CreatePutOperator(); + DestroyAndReopen(options); + + db_->Put(WriteOptions(), "foo", "bar"); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + ASSERT_OK(db_->Merge(WriteOptions(), "foo", "bar")); + + options.max_successive_merges = 3; + Reopen(options); +} } // namespace rocksdb int main(int argc, char** argv) {