From b1ee191405a276759656f8f196b72aa29101908d Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Fri, 11 Dec 2020 11:17:11 -0800 Subject: [PATCH] Fix memory leak for ColumnFamily drop with live iterator (#7749) Summary: Uncommon bug seen by ASAN with ColumnFamilyTest.LiveIteratorWithDroppedColumnFamily, if the last two references to a ColumnFamilyData are both SuperVersions (during InstallSuperVersion). The fix is to use UnrefAndTryDelete even in SuperVersion::Cleanup but with a parameter to avoid re-entering Cleanup on the same SuperVersion being cleaned up. ColumnFamilyData::Unref is considered unsafe so removed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7749 Test Plan: ./column_family_test --gtest_filter=*LiveIter* --gtest_repeat=100 Reviewed By: jay-zhuang Differential Revision: D25354304 Pulled By: pdillinger fbshipit-source-id: e78f3a3f67c40013b8432f31d0da8bec55c5321c --- db/column_family.cc | 12 +++++++----- db/column_family.h | 14 ++------------ db/version_set_test.cc | 5 +---- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/db/column_family.cc b/db/column_family.cc index 0ff01bf42..d735c4d4a 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -449,9 +449,7 @@ void SuperVersion::Cleanup() { to_delete.push_back(m); } current->Unref(); - if (cfd->Unref()) { - delete cfd; - } + cfd->UnrefAndTryDelete(this); } void SuperVersion::Init(ColumnFamilyData* new_cfd, MemTable* new_mem, @@ -660,7 +658,7 @@ ColumnFamilyData::~ColumnFamilyData() { } } -bool ColumnFamilyData::UnrefAndTryDelete() { +bool ColumnFamilyData::UnrefAndTryDelete(SuperVersion* sv_under_cleanup) { int old_refs = refs_.fetch_sub(1); assert(old_refs > 0); @@ -670,7 +668,11 @@ bool ColumnFamilyData::UnrefAndTryDelete() { return true; } - if (old_refs == 2 && super_version_ != nullptr) { + // If called under SuperVersion::Cleanup, we should not re-enter Cleanup on + // the same SuperVersion. (But while installing a new SuperVersion, this + // cfd could be referenced only by two SuperVersions.) + if (old_refs == 2 && super_version_ != nullptr && + super_version_ != sv_under_cleanup) { // Only the super_version_ holds me SuperVersion* sv = super_version_; super_version_ = nullptr; diff --git a/db/column_family.h b/db/column_family.h index cc41c5b2b..857ed818d 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -279,21 +279,11 @@ class ColumnFamilyData { // holding a DB mutex, or as the leader in a write batch group). void Ref() { refs_.fetch_add(1); } - // Unref decreases the reference count, but does not handle deletion - // when the count goes to 0. If this method returns true then the - // caller should delete the instance immediately, or later, by calling - // FreeDeadColumnFamilies(). Unref() can only be called while holding - // a DB mutex, or during single-threaded recovery. - bool Unref() { - int old_refs = refs_.fetch_sub(1); - assert(old_refs > 0); - return old_refs == 1; - } - // UnrefAndTryDelete() decreases the reference count and do free if needed, // return true if this is freed else false, UnrefAndTryDelete() can only // be called while holding a DB mutex, or during single-threaded recovery. - bool UnrefAndTryDelete(); + // sv_under_cleanup is only provided when called from SuperVersion::Cleanup. + bool UnrefAndTryDelete(SuperVersion* sv_under_cleanup = nullptr); // SetDropped() can only be called under following conditions: // 1) Holding a DB mutex, diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 0fb81d59a..cb02736de 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -2292,10 +2292,7 @@ TEST_P(VersionSetTestDropOneCF, HandleDroppedColumnFamilyInAtomicGroup) { mutex_.Unlock(); ASSERT_OK(s); ASSERT_EQ(1, called); - if (cfd_to_drop->Unref()) { - delete cfd_to_drop; - cfd_to_drop = nullptr; - } + cfd_to_drop->UnrefAndTryDelete(); } INSTANTIATE_TEST_CASE_P(