Cache simulator: Refactor the cache simulator so that we can add alternative policies easily (#5517)

Summary:
This PR creates cache_simulator.h file. It contains a CacheSimulator that runs against a block cache trace record. We can add alternative cache simulators derived from CacheSimulator later. For example, this PR adds a PrioritizedCacheSimulator that inserts filter/index/uncompressed dictionary blocks with high priority.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5517

Test Plan: make clean && COMPILE_WITH_ASAN=1 make check -j32

Differential Revision: D16043689

Pulled By: HaoyuHuang

fbshipit-source-id: 65f28ed52b866ffb0e6eceffd7f9ca7c45bb680d
This commit is contained in:
haoyuhuang 2019-07-01 12:43:14 -07:00 committed by Facebook Github Bot
parent 3886dddc3b
commit 9f0bd56889
7 changed files with 219 additions and 102 deletions

View File

@ -685,6 +685,7 @@ set(SOURCES
utilities/persistent_cache/block_cache_tier_metadata.cc utilities/persistent_cache/block_cache_tier_metadata.cc
utilities/persistent_cache/persistent_cache_tier.cc utilities/persistent_cache/persistent_cache_tier.cc
utilities/persistent_cache/volatile_tier_impl.cc utilities/persistent_cache/volatile_tier_impl.cc
utilities/simulator_cache/cache_simulator.cc
utilities/simulator_cache/sim_cache.cc utilities/simulator_cache/sim_cache.cc
utilities/table_properties_collectors/compact_on_deletion_collector.cc utilities/table_properties_collectors/compact_on_deletion_collector.cc
utilities/trace/file_trace_reader_writer.cc utilities/trace/file_trace_reader_writer.cc

View File

@ -280,6 +280,7 @@ cpp_library(
"utilities/persistent_cache/block_cache_tier_metadata.cc", "utilities/persistent_cache/block_cache_tier_metadata.cc",
"utilities/persistent_cache/persistent_cache_tier.cc", "utilities/persistent_cache/persistent_cache_tier.cc",
"utilities/persistent_cache/volatile_tier_impl.cc", "utilities/persistent_cache/volatile_tier_impl.cc",
"utilities/simulator_cache/cache_simulator.cc",
"utilities/simulator_cache/sim_cache.cc", "utilities/simulator_cache/sim_cache.cc",
"utilities/table_properties_collectors/compact_on_deletion_collector.cc", "utilities/table_properties_collectors/compact_on_deletion_collector.cc",
"utilities/trace/file_trace_reader_writer.cc", "utilities/trace/file_trace_reader_writer.cc",

1
src.mk
View File

@ -199,6 +199,7 @@ LIB_SOURCES = \
utilities/persistent_cache/block_cache_tier_metadata.cc \ utilities/persistent_cache/block_cache_tier_metadata.cc \
utilities/persistent_cache/persistent_cache_tier.cc \ utilities/persistent_cache/persistent_cache_tier.cc \
utilities/persistent_cache/volatile_tier_impl.cc \ utilities/persistent_cache/volatile_tier_impl.cc \
utilities/simulator_cache/cache_simulator.cc \
utilities/simulator_cache/sim_cache.cc \ utilities/simulator_cache/sim_cache.cc \
utilities/table_properties_collectors/compact_on_deletion_collector.cc \ utilities/table_properties_collectors/compact_on_deletion_collector.cc \
utilities/trace/file_trace_reader_writer.cc \ utilities/trace/file_trace_reader_writer.cc \

View File

@ -24,7 +24,7 @@ DEFINE_string(
"The config file path. One cache configuration per line. The format of a " "The config file path. One cache configuration per line. The format of a "
"cache configuration is " "cache configuration is "
"cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. " "cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. "
"cache_name is lru. cache_capacity can be xK, xM or xG " "cache_name is lru or lru_priority. cache_capacity can be xK, xM or xG "
"where x is a positive number."); "where x is a positive number.");
DEFINE_int32(block_cache_trace_downsample_ratio, 1, DEFINE_int32(block_cache_trace_downsample_ratio, 1,
"The trace collected accesses on one in every " "The trace collected accesses on one in every "
@ -179,47 +179,6 @@ double percent(uint64_t numerator, uint64_t denomenator) {
} // namespace } // namespace
BlockCacheTraceSimulator::BlockCacheTraceSimulator(
uint64_t warmup_seconds, uint32_t downsample_ratio,
const std::vector<CacheConfiguration>& cache_configurations)
: warmup_seconds_(warmup_seconds),
downsample_ratio_(downsample_ratio),
cache_configurations_(cache_configurations) {
for (auto const& config : cache_configurations_) {
for (auto cache_capacity : config.cache_capacities) {
// Scale down the cache capacity since the trace contains accesses on
// 1/'downsample_ratio' blocks.
uint64_t simulate_cache_capacity =
cache_capacity / downsample_ratio_;
sim_caches_.push_back(NewSimCache(
NewLRUCache(simulate_cache_capacity, config.num_shard_bits),
/*real_cache=*/nullptr, config.num_shard_bits));
}
}
}
void BlockCacheTraceSimulator::Access(const BlockCacheTraceRecord& access) {
if (trace_start_time_ == 0) {
trace_start_time_ = access.access_timestamp;
}
// access.access_timestamp is in microseconds.
if (!warmup_complete_ &&
trace_start_time_ + warmup_seconds_ * kMicrosInSecond <=
access.access_timestamp) {
for (auto& sim_cache : sim_caches_) {
sim_cache->reset_counter();
}
warmup_complete_ = true;
}
for (auto& sim_cache : sim_caches_) {
auto handle = sim_cache->Lookup(access.block_key);
if (handle == nullptr && !access.no_insert) {
sim_cache->Insert(access.block_key, /*value=*/nullptr, access.block_size,
/*deleter=*/nullptr);
}
}
}
void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const { void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const {
if (!cache_simulator_) { if (!cache_simulator_) {
return; return;
@ -237,27 +196,21 @@ void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const {
const std::string header = const std::string header =
"cache_name,num_shard_bits,capacity,miss_ratio,total_accesses"; "cache_name,num_shard_bits,capacity,miss_ratio,total_accesses";
out << header << std::endl; out << header << std::endl;
uint64_t sim_cache_index = 0; for (auto const& config_caches : cache_simulator_->sim_caches()) {
for (auto const& config : cache_simulator_->cache_configurations()) { const CacheConfiguration& config = config_caches.first;
for (auto cache_capacity : config.cache_capacities) { for (uint32_t i = 0; i < config.cache_capacities.size(); i++) {
uint64_t hits = double miss_ratio = config_caches.second[i]->miss_ratio();
cache_simulator_->sim_caches()[sim_cache_index]->get_hit_counter();
uint64_t misses =
cache_simulator_->sim_caches()[sim_cache_index]->get_miss_counter();
uint64_t total_accesses = hits + misses;
double miss_ratio = static_cast<double>(misses * 100.0 / total_accesses);
// Write the body. // Write the body.
out << config.cache_name; out << config.cache_name;
out << ","; out << ",";
out << config.num_shard_bits; out << config.num_shard_bits;
out << ","; out << ",";
out << cache_capacity; out << config.cache_capacities[i];
out << ","; out << ",";
out << std::fixed << std::setprecision(4) << miss_ratio; out << std::fixed << std::setprecision(4) << miss_ratio;
out << ","; out << ",";
out << total_accesses; out << config_caches.second[i]->total_accesses();
out << std::endl; out << std::endl;
sim_cache_index++;
} }
} }
out.close(); out.close();
@ -1095,6 +1048,12 @@ int block_cache_trace_analyzer_tool(int argc, char** argv) {
if (!cache_configs.empty()) { if (!cache_configs.empty()) {
cache_simulator.reset(new BlockCacheTraceSimulator( cache_simulator.reset(new BlockCacheTraceSimulator(
warmup_seconds, downsample_ratio, cache_configs)); warmup_seconds, downsample_ratio, cache_configs));
Status s = cache_simulator->InitializeCaches();
if (!s.ok()) {
fprintf(stderr, "Cannot initialize cache simulators %s\n",
s.ToString().c_str());
exit(1);
}
} }
BlockCacheTraceAnalyzer analyzer(FLAGS_block_cache_trace_path, BlockCacheTraceAnalyzer analyzer(FLAGS_block_cache_trace_path,
FLAGS_block_cache_analysis_result_dir, FLAGS_block_cache_analysis_result_dir,

View File

@ -12,57 +12,10 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/utilities/sim_cache.h" #include "rocksdb/utilities/sim_cache.h"
#include "trace_replay/block_cache_tracer.h" #include "trace_replay/block_cache_tracer.h"
#include "utilities/simulator_cache/cache_simulator.h"
namespace rocksdb { namespace rocksdb {
const uint64_t kMicrosInSecond = 1000000;
class BlockCacheTraceAnalyzer;
// A cache configuration provided by user.
struct CacheConfiguration {
std::string cache_name; // LRU.
uint32_t num_shard_bits;
std::vector<uint64_t>
cache_capacities; // simulate cache capacities in bytes.
};
// A block cache simulator that reports miss ratio curves given a set of cache
// configurations.
class BlockCacheTraceSimulator {
public:
// warmup_seconds: The number of seconds to warmup simulated caches. The
// hit/miss counters are reset after the warmup completes.
BlockCacheTraceSimulator(
uint64_t warmup_seconds, uint32_t downsample_ratio,
const std::vector<CacheConfiguration>& cache_configurations);
~BlockCacheTraceSimulator() = default;
// No copy and move.
BlockCacheTraceSimulator(const BlockCacheTraceSimulator&) = delete;
BlockCacheTraceSimulator& operator=(const BlockCacheTraceSimulator&) = delete;
BlockCacheTraceSimulator(BlockCacheTraceSimulator&&) = delete;
BlockCacheTraceSimulator& operator=(BlockCacheTraceSimulator&&) = delete;
void Access(const BlockCacheTraceRecord& access);
const std::vector<std::shared_ptr<SimCache>>& sim_caches() const {
return sim_caches_;
}
const std::vector<CacheConfiguration>& cache_configurations() const {
return cache_configurations_;
}
private:
const uint64_t warmup_seconds_;
const uint32_t downsample_ratio_;
const std::vector<CacheConfiguration> cache_configurations_;
bool warmup_complete_ = false;
std::vector<std::shared_ptr<SimCache>> sim_caches_;
uint64_t trace_start_time_ = 0;
};
// Statistics of a block. // Statistics of a block.
struct BlockAccessInfo { struct BlockAccessInfo {
uint64_t num_accesses = 0; uint64_t num_accesses = 0;

View File

@ -0,0 +1,104 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "utilities/simulator_cache/cache_simulator.h"
namespace rocksdb {
CacheSimulator::CacheSimulator(std::shared_ptr<SimCache> sim_cache)
: sim_cache_(sim_cache) {}
void CacheSimulator::Access(const BlockCacheTraceRecord& access) {
auto handle = sim_cache_->Lookup(access.block_key);
if (handle == nullptr && !access.no_insert) {
sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size,
/*deleter=*/nullptr, /*handle=*/nullptr);
}
}
void PrioritizedCacheSimulator::Access(const BlockCacheTraceRecord& access) {
auto handle = sim_cache_->Lookup(access.block_key);
if (handle == nullptr && !access.no_insert) {
Cache::Priority priority = Cache::Priority::LOW;
if (access.block_type == TraceType::kBlockTraceFilterBlock ||
access.block_type == TraceType::kBlockTraceIndexBlock ||
access.block_type == TraceType::kBlockTraceUncompressionDictBlock) {
priority = Cache::Priority::HIGH;
}
sim_cache_->Insert(access.block_key, /*value=*/nullptr, access.block_size,
/*deleter=*/nullptr, /*handle=*/nullptr, priority);
}
}
double CacheSimulator::miss_ratio() {
uint64_t hits = sim_cache_->get_hit_counter();
uint64_t misses = sim_cache_->get_miss_counter();
uint64_t total_accesses = hits + misses;
return static_cast<double>(misses * 100.0 / total_accesses);
}
uint64_t CacheSimulator::total_accesses() {
return sim_cache_->get_hit_counter() + sim_cache_->get_miss_counter();
}
BlockCacheTraceSimulator::BlockCacheTraceSimulator(
uint64_t warmup_seconds, uint32_t downsample_ratio,
const std::vector<CacheConfiguration>& cache_configurations)
: warmup_seconds_(warmup_seconds),
downsample_ratio_(downsample_ratio),
cache_configurations_(cache_configurations) {}
Status BlockCacheTraceSimulator::InitializeCaches() {
for (auto const& config : cache_configurations_) {
for (auto cache_capacity : config.cache_capacities) {
// Scale down the cache capacity since the trace contains accesses on
// 1/'downsample_ratio' blocks.
uint64_t simulate_cache_capacity = cache_capacity / downsample_ratio_;
std::shared_ptr<CacheSimulator> sim_cache;
if (config.cache_name == "lru") {
sim_cache = std::make_shared<CacheSimulator>(NewSimCache(
NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0),
/*real_cache=*/nullptr, config.num_shard_bits));
} else if (config.cache_name == "lru_priority") {
sim_cache = std::make_shared<PrioritizedCacheSimulator>(NewSimCache(
NewLRUCache(simulate_cache_capacity, config.num_shard_bits,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0.5),
/*real_cache=*/nullptr, config.num_shard_bits));
} else {
// Not supported.
return Status::InvalidArgument("Unknown cache name " +
config.cache_name);
}
sim_caches_[config].push_back(sim_cache);
}
}
return Status::OK();
}
void BlockCacheTraceSimulator::Access(const BlockCacheTraceRecord& access) {
if (trace_start_time_ == 0) {
trace_start_time_ = access.access_timestamp;
}
// access.access_timestamp is in microseconds.
if (!warmup_complete_ &&
trace_start_time_ + warmup_seconds_ * kMicrosInSecond <=
access.access_timestamp) {
for (auto& config_caches : sim_caches_) {
for (auto& sim_cache : config_caches.second) {
sim_cache->reset_counter();
}
}
warmup_complete_ = true;
}
for (auto& config_caches : sim_caches_) {
for (auto& sim_cache : config_caches.second) {
sim_cache->Access(access);
}
}
}
} // namespace rocksdb

View File

@ -0,0 +1,98 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include "rocksdb/utilities/sim_cache.h"
#include "trace_replay/block_cache_tracer.h"
namespace rocksdb {
const uint64_t kMicrosInSecond = 1000000;
// A cache configuration provided by user.
struct CacheConfiguration {
std::string cache_name; // LRU.
uint32_t num_shard_bits;
std::vector<uint64_t>
cache_capacities; // simulate cache capacities in bytes.
bool operator=(const CacheConfiguration& o) const {
return cache_name == o.cache_name && num_shard_bits == o.num_shard_bits;
}
bool operator<(const CacheConfiguration& o) const {
return cache_name < o.cache_name ||
(cache_name == o.cache_name && num_shard_bits < o.num_shard_bits);
}
};
// A cache simulator that runs against a block cache trace.
class CacheSimulator {
public:
CacheSimulator(std::shared_ptr<SimCache> sim_cache);
virtual ~CacheSimulator() = default;
// No copy and move.
CacheSimulator(const CacheSimulator&) = delete;
CacheSimulator& operator=(const CacheSimulator&) = delete;
CacheSimulator(CacheSimulator&&) = delete;
CacheSimulator& operator=(CacheSimulator&&) = delete;
virtual void Access(const BlockCacheTraceRecord& access);
void reset_counter() { sim_cache_->reset_counter(); }
double miss_ratio();
uint64_t total_accesses();
protected:
std::shared_ptr<SimCache> sim_cache_;
};
// A prioritized cache simulator that runs against a block cache trace.
// It inserts missing index/filter/uncompression-dictionary blocks with high
// priority in the cache.
class PrioritizedCacheSimulator : public CacheSimulator {
public:
PrioritizedCacheSimulator(std::shared_ptr<SimCache> sim_cache)
: CacheSimulator(sim_cache) {}
void Access(const BlockCacheTraceRecord& access) override;
};
// A block cache simulator that reports miss ratio curves given a set of cache
// configurations.
class BlockCacheTraceSimulator {
public:
// warmup_seconds: The number of seconds to warmup simulated caches. The
// hit/miss counters are reset after the warmup completes.
BlockCacheTraceSimulator(
uint64_t warmup_seconds, uint32_t downsample_ratio,
const std::vector<CacheConfiguration>& cache_configurations);
~BlockCacheTraceSimulator() = default;
// No copy and move.
BlockCacheTraceSimulator(const BlockCacheTraceSimulator&) = delete;
BlockCacheTraceSimulator& operator=(const BlockCacheTraceSimulator&) = delete;
BlockCacheTraceSimulator(BlockCacheTraceSimulator&&) = delete;
BlockCacheTraceSimulator& operator=(BlockCacheTraceSimulator&&) = delete;
Status InitializeCaches();
void Access(const BlockCacheTraceRecord& access);
const std::map<CacheConfiguration,
std::vector<std::shared_ptr<CacheSimulator>>>&
sim_caches() const {
return sim_caches_;
}
private:
const uint64_t warmup_seconds_;
const uint32_t downsample_ratio_;
const std::vector<CacheConfiguration> cache_configurations_;
bool warmup_complete_ = false;
std::map<CacheConfiguration, std::vector<std::shared_ptr<CacheSimulator>>>
sim_caches_;
uint64_t trace_start_time_ = 0;
};
} // namespace rocksdb