Fix a major performance bug in 6.21 for cache entry stats (#8369)
Summary: In final polishing of https://github.com/facebook/rocksdb/issues/8297 (after most manual testing), I broke my own caching layer by sanitizing an input parameter with std::min(0, x) instead of std::max(0, x). I resisted unit testing the timing part of the result caching because historically, these test are either flaky or difficult to write, and this was not a correctness issue. This bug is essentially unnoticeable with a small number of column families but can explode background work with a large number of column families. This change fixes the logical error, removes some unnecessary related optimization, and adds mock time/sleeps to the unit test to ensure we can cache hit within the age limit. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8369 Test Plan: added time testing logic to existing unit test Reviewed By: ajkr Differential Revision: D28950892 Pulled By: pdillinger fbshipit-source-id: e79cd4ff3eec68fd0119d994f1ed468c38026c3b
This commit is contained in:
parent
c7f8ae9f17
commit
229640f88d
12
cache/cache_entry_stats.h
vendored
12
cache/cache_entry_stats.h
vendored
@ -56,15 +56,11 @@ class CacheEntryStatsCollector {
|
||||
// Waits for any pending reader or writer (collector)
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
|
||||
// Maximum allowed age is nominally given by the parameter
|
||||
// Maximum allowed age is nominally given by the parameter, but
|
||||
// to limit the possibility of accidental repeated scans, impose
|
||||
// a minimum TTL of 1 second.
|
||||
uint64_t max_age_micros =
|
||||
static_cast<uint64_t>(std::min(maximum_age_in_seconds, 0)) * 1000000U;
|
||||
// But we will re-scan more frequently if it means scanning < 1%
|
||||
// of the time and no more than once per second.
|
||||
max_age_micros = std::min(
|
||||
max_age_micros,
|
||||
std::max(uint64_t{1000000},
|
||||
100U * (last_end_time_micros_ - last_start_time_micros_)));
|
||||
static_cast<uint64_t>(std::max(maximum_age_in_seconds, 1)) * 1000000U;
|
||||
|
||||
uint64_t start_time_micros = clock_->NowMicros();
|
||||
if ((start_time_micros - last_end_time_micros_) > max_age_micros) {
|
||||
|
@ -930,9 +930,9 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
|
||||
++iterations_tested;
|
||||
|
||||
Options options = CurrentOptions();
|
||||
SetTimeElapseOnlySleepOnReopen(&options);
|
||||
options.create_if_missing = true;
|
||||
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
||||
options.stats_dump_period_sec = 0;
|
||||
options.max_open_files = 13;
|
||||
options.table_cache_numshardbits = 0;
|
||||
|
||||
@ -972,12 +972,21 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
|
||||
expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
|
||||
EXPECT_EQ(expected, GetCacheEntryRoleCounts());
|
||||
|
||||
std::array<size_t, kNumCacheEntryRoles> prev_expected = expected;
|
||||
|
||||
// First access only filters
|
||||
ASSERT_EQ("NOT_FOUND", Get("different from any key added"));
|
||||
expected[static_cast<size_t>(CacheEntryRole::kFilterBlock)] += 2;
|
||||
if (partition) {
|
||||
expected[static_cast<size_t>(CacheEntryRole::kFilterMetaBlock)] += 2;
|
||||
}
|
||||
// Within some time window, we will get cached entry stats
|
||||
EXPECT_EQ(prev_expected, GetCacheEntryRoleCounts());
|
||||
// Not enough to force a miss
|
||||
env_->MockSleepForSeconds(10);
|
||||
EXPECT_EQ(prev_expected, GetCacheEntryRoleCounts());
|
||||
// Enough to force a miss
|
||||
env_->MockSleepForSeconds(1000);
|
||||
EXPECT_EQ(expected, GetCacheEntryRoleCounts());
|
||||
|
||||
// Now access index and data block
|
||||
@ -988,6 +997,7 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
|
||||
expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
|
||||
}
|
||||
expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
|
||||
env_->MockSleepForSeconds(1000);
|
||||
EXPECT_EQ(expected, GetCacheEntryRoleCounts());
|
||||
|
||||
// The same for other file
|
||||
@ -998,6 +1008,7 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
|
||||
expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
|
||||
}
|
||||
expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
|
||||
env_->MockSleepForSeconds(1000);
|
||||
EXPECT_EQ(expected, GetCacheEntryRoleCounts());
|
||||
|
||||
// Also check the GetProperty interface
|
||||
|
@ -537,6 +537,8 @@ Status InternalStats::CollectCacheEntryStats() {
|
||||
std::shared_ptr<Collector> collector;
|
||||
Status s = Collector::GetShared(block_cache, clock_, &collector);
|
||||
if (s.ok()) {
|
||||
// TODO: use a max age like stats_dump_period_sec / 2, but it's
|
||||
// difficult to access that setting from here with just cfd_
|
||||
collector->GetStats(&cache_entry_stats);
|
||||
} else {
|
||||
// Block cache likely under pressure. Scanning could make it worse,
|
||||
|
Loading…
Reference in New Issue
Block a user