Summary:
**Context:**
`DBLogicalBlockSizeCacheTest.CreateColumnFamilies` is flaky on a rare occurrence of assertion failure below
```
db/db_logical_block_size_cache_test.cc:210
Expected equality of these values:
1
cache_->GetRefCount(cf_path_0_)
Which is: 2
```
Root-cause: `ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0]));` in the test may not successfully decrease the ref count of `cf_path_0_` since the decreasing only happens in the clean-up of `ColumnFamilyData` when `ColumnFamilyData` has no referencing to it, which may not be true when `db->DestroyColumnFamilyHandle(cfs[0])` is called since background work such as `DumpStats()` can hold reference to that `ColumnFamilyData` (suggested and repro-d by ajkr ). Similar case `ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1]));`.
See following for a deterministic repro:
```
diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc
index 196b428a3..4e7a834c4 100644
--- a/db/db_impl/db_impl.cc
+++ b/db/db_impl/db_impl.cc
@@ -956,10 +956,16 @@ void DBImpl::DumpStats() {
// near-atomically.
// Get a ref before unlocking
cfd->Ref();
+ if (cfd->GetName() == "cf1" || cfd->GetName() == "cf2") {
+ TEST_SYNC_POINT("DBImpl::DumpStats:PostCFDRef");
+ }
{
InstrumentedMutexUnlock u(&mutex_);
cfd->internal_stats()->CollectCacheEntryStats(/*foreground=*/false);
}
+ if (cfd->GetName() == "cf1" || cfd->GetName() == "cf2") {
+ TEST_SYNC_POINT("DBImpl::DumpStats::PreCFDUnrefAndTryDelete");
+ }
cfd->UnrefAndTryDelete();
}
}
diff --git a/db/db_logical_block_size_cache_test.cc b/db/db_logical_block_size_cache_test.cc
index 1057871c9..c3872c036 100644
--- a/db/db_logical_block_size_cache_test.cc
+++ b/db/db_logical_block_size_cache_test.cc
@@ -9,6 +9,7 @@
#include "env/io_posix.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
+#include "test_util/sync_point.h"
namespace ROCKSDB_NAMESPACE {
class EnvWithCustomLogicalBlockSizeCache : public EnvWrapper {
@@ -183,6 +184,15 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) {
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
std::vector<ColumnFamilyHandle*> cfs;
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
+ {{"DBLogicalBlockSizeCacheTest::CreateColumnFamilies::PostSetupTwoCFH",
+ "DBImpl::DumpStats:StartRunning"},
+ {"DBImpl::DumpStats:PostCFDRef",
+ "DBLogicalBlockSizeCacheTest::CreateColumnFamilies::PreDeleteTwoCFH"},
+ {"DBLogicalBlockSizeCacheTest::CreateColumnFamilies::"
+ "PostFinishCheckingRef",
+ "DBImpl::DumpStats::PreCFDUnrefAndTryDelete"}});
ASSERT_OK(db->CreateColumnFamilies(cf_options, {"cf1", "cf2"}, &cfs));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
@@ -190,7 +200,7 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) {
ASSERT_TRUE(cache_->Contains(cf_path_0_));
ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_));
}
// Delete one handle will not drop cache because another handle is still
// referencing cf_path_0_.
+ TEST_SYNC_POINT(
+ "DBLogicalBlockSizeCacheTest::CreateColumnFamilies::PostSetupTwoCFH");
+ TEST_SYNC_POINT(
+ "DBLogicalBlockSizeCacheTest::CreateColumnFamilies::PreDeleteTwoCFH");
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0]));
ASSERT_EQ(2, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
@@ -209,16 +221,20 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) {
ASSERT_TRUE(cache_->Contains(cf_path_0_));
// Will fail
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
// Delete the last handle will drop cache.
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1]));
ASSERT_EQ(1, cache_->Size());
ASSERT_TRUE(cache_->Contains(dbname_));
// Will fail
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
+ TEST_SYNC_POINT(
+ "DBLogicalBlockSizeCacheTest::CreateColumnFamilies::"
+ "PostFinishCheckingRef");
delete db;
ASSERT_EQ(0, cache_->Size());
ASSERT_OK(DestroyDB(dbname_, options,
{{"cf1", cf_options}, {"cf2", cf_options}}));
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}
```
**Summary**
- Removed the flaky assertion
- Clarified the comments for the test
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9516
Test Plan:
- CI
- Monitor for future flakiness
Reviewed By: ajkr
Differential Revision: D34055232
Pulled By: hx235
fbshipit-source-id: 9bf83ae5fa88bf6fc829876494d4692082e4c357