diff --git a/db/column_family.cc b/db/column_family.cc index c32e97f06..e6518df75 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -344,12 +344,13 @@ void SuperVersionUnrefHandle(void* ptr) { // When latter happens, we are in ~ColumnFamilyData(), no get should happen as // well. SuperVersion* sv = static_cast(ptr); - if (sv->Unref()) { - sv->db_mutex->Lock(); - sv->Cleanup(); - sv->db_mutex->Unlock(); - delete sv; - } + bool was_last_ref __attribute__((__unused__)); + was_last_ref = sv->Unref(); + // Thread-local SuperVersions can't outlive ColumnFamilyData::super_version_. + // This is important because we can't do SuperVersion cleanup here. + // That would require locking DB mutex, which would deadlock because + // SuperVersionUnrefHandle is called with locked ThreadLocalPtr mutex. + assert(!was_last_ref); } } // anonymous namespace @@ -966,6 +967,12 @@ void ColumnFamilyData::InstallSuperVersion( RecalculateWriteStallConditions(mutable_cf_options); if (old_superversion != nullptr) { + // Reset SuperVersions cached in thread local storage. + // This should be done before old_superversion->Unref(). That's to ensure + // that local_sv_ never holds the last reference to SuperVersion, since + // it has no means to safely do SuperVersion cleanup. + ResetThreadLocalSuperVersions(); + if (old_superversion->mutable_cf_options.write_buffer_size != mutable_cf_options.write_buffer_size) { mem_->UpdateWriteBufferSize(mutable_cf_options.write_buffer_size); @@ -981,9 +988,6 @@ void ColumnFamilyData::InstallSuperVersion( sv_context->superversions_to_free.push_back(old_superversion); } } - - // Reset SuperVersions cached in thread local storage - ResetThreadLocalSuperVersions(); } void ColumnFamilyData::ResetThreadLocalSuperVersions() { @@ -995,10 +999,12 @@ void ColumnFamilyData::ResetThreadLocalSuperVersions() { continue; } auto sv = static_cast(ptr); - if (sv->Unref()) { - sv->Cleanup(); - delete sv; - } + bool was_last_ref __attribute__((__unused__)); + was_last_ref = sv->Unref(); + // sv couldn't have been the last reference because + // ResetThreadLocalSuperVersions() is called before + // unref'ing super_version_. + assert(!was_last_ref); } } diff --git a/db/db_test.cc b/db/db_test.cc index c9233b87c..cd352a82d 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -5656,6 +5656,50 @@ TEST_F(DBTest, PauseBackgroundWorkTest) { // now it's done ASSERT_TRUE(done.load()); } + +// Keep spawning short-living threads that create an iterator and quit. +// Meanwhile in another thread keep flushing memtables. +// This used to cause a deadlock. +TEST_F(DBTest, ThreadLocalPtrDeadlock) { + std::atomic flushes_done{0}; + std::atomic threads_destroyed{0}; + auto done = [&] { + return flushes_done.load() > 10; + }; + + std::thread flushing_thread([&] { + for (int i = 0; !done(); ++i) { + ASSERT_OK(db_->Put(WriteOptions(), Slice("hi"), + Slice(std::to_string(i).c_str()))); + ASSERT_OK(db_->Flush(FlushOptions())); + int cnt = ++flushes_done; + fprintf(stderr, "Flushed %d times\n", cnt); + } + }); + + std::vector thread_spawning_threads(10); + for (auto& t: thread_spawning_threads) { + t = std::thread([&] { + while (!done()) { + { + std::thread tmp_thread([&] { + auto it = db_->NewIterator(ReadOptions()); + delete it; + }); + tmp_thread.join(); + } + ++threads_destroyed; + } + }); + } + + for (auto& t: thread_spawning_threads) { + t.join(); + } + flushing_thread.join(); + fprintf(stderr, "Done. Flushed %d times, destroyed %d threads\n", + flushes_done.load(), threads_destroyed.load()); +} } // namespace rocksdb int main(int argc, char** argv) { diff --git a/util/thread_local.h b/util/thread_local.h index 1ca5b10dd..175f1cca4 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -24,6 +24,13 @@ namespace rocksdb { // pointer (if not NULL) when one of the following happens: // (1) a thread terminates // (2) a ThreadLocalPtr is destroyed +// +// Warning: this function is called while holding a global mutex. The same mutex +// is used (at least in some cases) by most methods of ThreadLocalPtr, and it's +// shared across all instances of ThreadLocalPtr. Thereforere extra care +// is needed to avoid deadlocks. In particular, the handler shouldn't lock any +// mutexes and shouldn't call any methods of any ThreadLocalPtr instances, +// unless you know what you're doing. typedef void (*UnrefHandler)(void* ptr); // ThreadLocalPtr stores only values of pointer type. Different from