From 2f93a3b80912b337c2d97c31292168ae6be2b949 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 8 Jun 2021 05:02:29 -0700 Subject: [PATCH] Fix a major performance bug in 6.21 for cache entry stats (#8369) Summary: In final polishing of https://github.com/facebook/rocksdb/issues/8297 (after most manual testing), I broke my own caching layer by sanitizing an input parameter with std::min(0, x) instead of std::max(0, x). I resisted unit testing the timing part of the result caching because historically, these test are either flaky or difficult to write, and this was not a correctness issue. This bug is essentially unnoticeable with a small number of column families but can explode background work with a large number of column families. This change fixes the logical error, removes some unnecessary related optimization, and adds mock time/sleeps to the unit test to ensure we can cache hit within the age limit. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8369 Test Plan: added time testing logic to existing unit test Reviewed By: ajkr Differential Revision: D28950892 Pulled By: pdillinger fbshipit-source-id: e79cd4ff3eec68fd0119d994f1ed468c38026c3b --- cache/cache_entry_stats.h | 12 ++++-------- db/db_block_cache_test.cc | 13 ++++++++++++- db/internal_stats.cc | 2 ++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/cache/cache_entry_stats.h b/cache/cache_entry_stats.h index 0770bbd70..f602b448e 100644 --- a/cache/cache_entry_stats.h +++ b/cache/cache_entry_stats.h @@ -56,15 +56,11 @@ class CacheEntryStatsCollector { // Waits for any pending reader or writer (collector) std::lock_guard lock(mutex_); - // Maximum allowed age is nominally given by the parameter + // Maximum allowed age is nominally given by the parameter, but + // to limit the possibility of accidental repeated scans, impose + // a minimum TTL of 1 second. uint64_t max_age_micros = - static_cast(std::min(maximum_age_in_seconds, 0)) * 1000000U; - // But we will re-scan more frequently if it means scanning < 1% - // of the time and no more than once per second. - max_age_micros = std::min( - max_age_micros, - std::max(uint64_t{1000000}, - 100U * (last_end_time_micros_ - last_start_time_micros_))); + static_cast(std::max(maximum_age_in_seconds, 1)) * 1000000U; uint64_t start_time_micros = clock_->NowMicros(); if ((start_time_micros - last_end_time_micros_) > max_age_micros) { diff --git a/db/db_block_cache_test.cc b/db/db_block_cache_test.cc index 54e52aa50..45dae285c 100644 --- a/db/db_block_cache_test.cc +++ b/db/db_block_cache_test.cc @@ -930,9 +930,9 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) { ++iterations_tested; Options options = CurrentOptions(); + SetTimeElapseOnlySleepOnReopen(&options); options.create_if_missing = true; options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics(); - options.stats_dump_period_sec = 0; options.max_open_files = 13; options.table_cache_numshardbits = 0; @@ -972,12 +972,21 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) { expected[static_cast(CacheEntryRole::kMisc)] = 1; EXPECT_EQ(expected, GetCacheEntryRoleCounts()); + std::array prev_expected = expected; + // First access only filters ASSERT_EQ("NOT_FOUND", Get("different from any key added")); expected[static_cast(CacheEntryRole::kFilterBlock)] += 2; if (partition) { expected[static_cast(CacheEntryRole::kFilterMetaBlock)] += 2; } + // Within some time window, we will get cached entry stats + EXPECT_EQ(prev_expected, GetCacheEntryRoleCounts()); + // Not enough to force a miss + env_->MockSleepForSeconds(10); + EXPECT_EQ(prev_expected, GetCacheEntryRoleCounts()); + // Enough to force a miss + env_->MockSleepForSeconds(1000); EXPECT_EQ(expected, GetCacheEntryRoleCounts()); // Now access index and data block @@ -988,6 +997,7 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) { expected[static_cast(CacheEntryRole::kIndexBlock)]++; } expected[static_cast(CacheEntryRole::kDataBlock)]++; + env_->MockSleepForSeconds(1000); EXPECT_EQ(expected, GetCacheEntryRoleCounts()); // The same for other file @@ -998,6 +1008,7 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) { expected[static_cast(CacheEntryRole::kIndexBlock)]++; } expected[static_cast(CacheEntryRole::kDataBlock)]++; + env_->MockSleepForSeconds(1000); EXPECT_EQ(expected, GetCacheEntryRoleCounts()); // Also check the GetProperty interface diff --git a/db/internal_stats.cc b/db/internal_stats.cc index e8af90f0a..b609add47 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -537,6 +537,8 @@ Status InternalStats::CollectCacheEntryStats() { std::shared_ptr collector; Status s = Collector::GetShared(block_cache, clock_, &collector); if (s.ok()) { + // TODO: use a max age like stats_dump_period_sec / 2, but it's + // difficult to access that setting from here with just cfd_ collector->GetStats(&cache_entry_stats); } else { // Block cache likely under pressure. Scanning could make it worse,