Attempt to deflake DBLogicalBlockSizeCacheTest.CreateColumnFamilies (#9516)
Summary: **Context:** `DBLogicalBlockSizeCacheTest.CreateColumnFamilies` is flaky on a rare occurrence of assertion failure below ``` db/db_logical_block_size_cache_test.cc:210 Expected equality of these values: 1 cache_->GetRefCount(cf_path_0_) Which is: 2 ``` Root-cause: `ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0]));` in the test may not successfully decrease the ref count of `cf_path_0_` since the decreasing only happens in the clean-up of `ColumnFamilyData` when `ColumnFamilyData` has no referencing to it, which may not be true when `db->DestroyColumnFamilyHandle(cfs[0])` is called since background work such as `DumpStats()` can hold reference to that `ColumnFamilyData` (suggested and repro-d by ajkr ). Similar case `ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1]));`. See following for a deterministic repro: ``` diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 196b428a3..4e7a834c4 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -956,10 +956,16 @@ void DBImpl::DumpStats() { // near-atomically. // Get a ref before unlocking cfd->Ref(); + if (cfd->GetName() == "cf1" || cfd->GetName() == "cf2") { + TEST_SYNC_POINT("DBImpl::DumpStats:PostCFDRef"); + } { InstrumentedMutexUnlock u(&mutex_); cfd->internal_stats()->CollectCacheEntryStats(/*foreground=*/false); } + if (cfd->GetName() == "cf1" || cfd->GetName() == "cf2") { + TEST_SYNC_POINT("DBImpl::DumpStats::PreCFDUnrefAndTryDelete"); + } cfd->UnrefAndTryDelete(); } } diff --git a/db/db_logical_block_size_cache_test.cc b/db/db_logical_block_size_cache_test.cc index 1057871c9..c3872c036 100644 --- a/db/db_logical_block_size_cache_test.cc +++ b/db/db_logical_block_size_cache_test.cc @@ -9,6 +9,7 @@ #include "env/io_posix.h" #include "rocksdb/db.h" #include "rocksdb/env.h" +#include "test_util/sync_point.h" namespace ROCKSDB_NAMESPACE { class EnvWithCustomLogicalBlockSizeCache : public EnvWrapper { @@ -183,6 +184,15 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) { ASSERT_EQ(1, cache_->GetRefCount(dbname_)); std::vector<ColumnFamilyHandle*> cfs; + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing(); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency( + {{"DBLogicalBlockSizeCacheTest::CreateColumnFamilies::PostSetupTwoCFH", + "DBImpl::DumpStats:StartRunning"}, + {"DBImpl::DumpStats:PostCFDRef", + "DBLogicalBlockSizeCacheTest::CreateColumnFamilies::PreDeleteTwoCFH"}, + {"DBLogicalBlockSizeCacheTest::CreateColumnFamilies::" + "PostFinishCheckingRef", + "DBImpl::DumpStats::PreCFDUnrefAndTryDelete"}}); ASSERT_OK(db->CreateColumnFamilies(cf_options, {"cf1", "cf2"}, &cfs)); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); @@ -190,7 +200,7 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) { ASSERT_TRUE(cache_->Contains(cf_path_0_)); ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_)); } // Delete one handle will not drop cache because another handle is still // referencing cf_path_0_. + TEST_SYNC_POINT( + "DBLogicalBlockSizeCacheTest::CreateColumnFamilies::PostSetupTwoCFH"); + TEST_SYNC_POINT( + "DBLogicalBlockSizeCacheTest::CreateColumnFamilies::PreDeleteTwoCFH"); ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0])); ASSERT_EQ(2, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); @@ -209,16 +221,20 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) { ASSERT_TRUE(cache_->Contains(cf_path_0_)); // Will fail ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_)); // Delete the last handle will drop cache. ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1])); ASSERT_EQ(1, cache_->Size()); ASSERT_TRUE(cache_->Contains(dbname_)); // Will fail ASSERT_EQ(1, cache_->GetRefCount(dbname_)); + TEST_SYNC_POINT( + "DBLogicalBlockSizeCacheTest::CreateColumnFamilies::" + "PostFinishCheckingRef"); delete db; ASSERT_EQ(0, cache_->Size()); ASSERT_OK(DestroyDB(dbname_, options, {{"cf1", cf_options}, {"cf2", cf_options}})); + ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } ``` **Summary** - Removed the flaky assertion - Clarified the comments for the test Pull Request resolved: https://github.com/facebook/rocksdb/pull/9516 Test Plan: - CI - Monitor for future flakiness Reviewed By: ajkr Differential Revision: D34055232 Pulled By: hx235 fbshipit-source-id: 9bf83ae5fa88bf6fc829876494d4692082e4c357
This commit is contained in:
parent
4a776d81cc
commit
fc61e98ae6
@ -167,9 +167,14 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamily) {
|
||||
}
|
||||
|
||||
TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) {
|
||||
// Tests that CreateColumnFamilies will cache the cf_paths,
|
||||
// drop the column family handle won't drop the cache,
|
||||
// drop and then delete the column family handle will drop the cache.
|
||||
// To test:
|
||||
// (1) CreateColumnFamilies will cache the cf_paths in
|
||||
// DBLogicalBlockSizeCache
|
||||
// (2) Dropping column family handles associated with
|
||||
// that cf_paths won't drop the cached cf_paths
|
||||
// (3) Deleting all the column family handles associated
|
||||
// with that cf_paths will drop the cached cf_paths
|
||||
|
||||
Options options;
|
||||
options.create_if_missing = true;
|
||||
options.env = env_.get();
|
||||
@ -190,7 +195,7 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) {
|
||||
ASSERT_TRUE(cache_->Contains(cf_path_0_));
|
||||
ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_));
|
||||
|
||||
// Drop column family does not drop cache.
|
||||
// Drop column family does not drop cf_path_0_'s entry from cache
|
||||
for (ColumnFamilyHandle* cf : cfs) {
|
||||
ASSERT_OK(db->DropColumnFamily(cf));
|
||||
ASSERT_EQ(2, cache_->Size());
|
||||
@ -200,22 +205,24 @@ TEST_F(DBLogicalBlockSizeCacheTest, CreateColumnFamilies) {
|
||||
ASSERT_EQ(2, cache_->GetRefCount(cf_path_0_));
|
||||
}
|
||||
|
||||
// Delete one handle will not drop cache because another handle is still
|
||||
// referencing cf_path_0_.
|
||||
// Delete one cf handle will not drop cf_path_0_'s entry from cache because
|
||||
// another handle is still referencing cf_path_0_.
|
||||
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[0]));
|
||||
ASSERT_EQ(2, cache_->Size());
|
||||
ASSERT_TRUE(cache_->Contains(dbname_));
|
||||
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
|
||||
ASSERT_TRUE(cache_->Contains(cf_path_0_));
|
||||
ASSERT_EQ(1, cache_->GetRefCount(cf_path_0_));
|
||||
|
||||
// Delete the last handle will drop cache.
|
||||
// Delete all cf handles and ensure the ref count of cf_path_0_ in cache_
|
||||
// can be properly decreased by releasing any background reference to the
|
||||
// ColumnFamilyData during db deletion
|
||||
ASSERT_OK(db->DestroyColumnFamilyHandle(cfs[1]));
|
||||
ASSERT_EQ(1, cache_->Size());
|
||||
ASSERT_TRUE(cache_->Contains(dbname_));
|
||||
ASSERT_EQ(1, cache_->GetRefCount(dbname_));
|
||||
|
||||
delete db;
|
||||
|
||||
// Now cf_path_0_ in cache_ has been properly decreased and cf_path_0_'s entry
|
||||
// is dropped from cache
|
||||
ASSERT_EQ(0, cache_->Size());
|
||||
ASSERT_OK(DestroyDB(dbname_, options,
|
||||
{{"cf1", cf_options}, {"cf2", cf_options}}));
|
||||
|
Loading…
Reference in New Issue
Block a user