Add more stats in the block cache trace analyzer (#5482)
Summary: This PR adds more stats in the block cache trace analyzer. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5482 Differential Revision: D15883553 Pulled By: HaoyuHuang fbshipit-source-id: 6d440e4f657af75690420102d532d0ee1ed4e9cf
This commit is contained in:
parent
f46a2a0375
commit
2e8ad03ab3
@ -27,6 +27,10 @@ DEFINE_string(
|
||||
"cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. "
|
||||
"cache_name is lru. cache_capacity can be xK, xM or xG "
|
||||
"where x is a positive number.");
|
||||
DEFINE_int32(block_cache_trace_downsample_ratio, 1,
|
||||
"The trace collected accesses on one in every "
|
||||
"block_cache_trace_downsample_ratio blocks. We scale "
|
||||
"down the simulated cache size by this ratio.");
|
||||
DEFINE_bool(print_block_size_stats, false,
|
||||
"Print block size distribution and the distribution break down by "
|
||||
"block type and column family.");
|
||||
@ -91,18 +95,30 @@ void print_break_lines(uint32_t num_break_lines) {
|
||||
}
|
||||
}
|
||||
|
||||
double percent(uint64_t numerator, uint64_t denomenator) {
|
||||
if (denomenator == 0) {
|
||||
return -1;
|
||||
}
|
||||
return static_cast<double>(numerator * 100.0 / denomenator);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
BlockCacheTraceSimulator::BlockCacheTraceSimulator(
|
||||
uint64_t warmup_seconds,
|
||||
uint64_t warmup_seconds, uint32_t downsample_ratio,
|
||||
const std::vector<CacheConfiguration>& cache_configurations)
|
||||
: warmup_seconds_(warmup_seconds),
|
||||
downsample_ratio_(downsample_ratio),
|
||||
cache_configurations_(cache_configurations) {
|
||||
for (auto const& config : cache_configurations_) {
|
||||
for (auto cache_capacity : config.cache_capacities) {
|
||||
sim_caches_.push_back(
|
||||
NewSimCache(NewLRUCache(cache_capacity, config.num_shard_bits),
|
||||
/*real_cache=*/nullptr, config.num_shard_bits));
|
||||
// Scale down the cache capacity since the trace contains accesses on
|
||||
// 1/'downsample_ratio' blocks.
|
||||
uint64_t simulate_cache_capacity =
|
||||
cache_capacity / downsample_ratio_;
|
||||
sim_caches_.push_back(NewSimCache(
|
||||
NewLRUCache(simulate_cache_capacity, config.num_shard_bits),
|
||||
/*real_cache=*/nullptr, config.num_shard_bits));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -285,11 +301,12 @@ void BlockCacheTraceAnalyzer::PrintAccessCountStats() const {
|
||||
}
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "Block access count stats: \n%s",
|
||||
fprintf(stdout,
|
||||
"Block access count stats: The number of accesses per block.\n%s",
|
||||
access_stats.ToString().c_str());
|
||||
for (auto const& bt_stats : bt_stats_map) {
|
||||
print_break_lines(/*num_break_lines=*/1);
|
||||
fprintf(stdout, "Block access count stats for block type %s: \n%s",
|
||||
fprintf(stdout, "Break down by block type %s: \n%s",
|
||||
block_type_to_string(bt_stats.first).c_str(),
|
||||
bt_stats.second.ToString().c_str());
|
||||
}
|
||||
@ -298,7 +315,7 @@ void BlockCacheTraceAnalyzer::PrintAccessCountStats() const {
|
||||
for (auto const& bt_stats : cf_bt_stats.second) {
|
||||
print_break_lines(/*num_break_lines=*/1);
|
||||
fprintf(stdout,
|
||||
"Block access count stats for column family %s and block type "
|
||||
"Break down by column family %s and block type "
|
||||
"%s: \n%s",
|
||||
cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
|
||||
bt_stats.second.ToString().c_str());
|
||||
@ -313,6 +330,15 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
|
||||
std::map<std::string, HistogramStat> cf_non_existing_keys_stats_map;
|
||||
HistogramStat block_access_stats;
|
||||
std::map<std::string, HistogramStat> cf_block_access_info;
|
||||
HistogramStat percent_referenced_bytes;
|
||||
std::map<std::string, HistogramStat> cf_percent_referenced_bytes;
|
||||
// Total number of accesses in a data block / number of keys in a data block.
|
||||
HistogramStat avg_naccesses_per_key_in_a_data_block;
|
||||
std::map<std::string, HistogramStat> cf_avg_naccesses_per_key_in_a_data_block;
|
||||
// The standard deviation on the number of accesses of a key in a data block.
|
||||
HistogramStat stdev_naccesses_per_key_in_a_data_block;
|
||||
std::map<std::string, HistogramStat>
|
||||
cf_stdev_naccesses_per_key_in_a_data_block;
|
||||
|
||||
for (auto const& cf_aggregates : cf_aggregates_map_) {
|
||||
// Stats per column family.
|
||||
@ -343,6 +369,20 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
|
||||
block_access_info.second.num_referenced_key_exist_in_block /
|
||||
(double)block_access_info.second.num_accesses) *
|
||||
10000.0);
|
||||
|
||||
HistogramStat hist_naccess_per_key;
|
||||
for (auto const& key_access :
|
||||
block_access_info.second.key_num_access_map) {
|
||||
hist_naccess_per_key.Add(key_access.second);
|
||||
}
|
||||
uint64_t avg_accesses = hist_naccess_per_key.Average();
|
||||
uint64_t stdev_accesses = hist_naccess_per_key.StandardDeviation();
|
||||
avg_naccesses_per_key_in_a_data_block.Add(avg_accesses);
|
||||
cf_avg_naccesses_per_key_in_a_data_block[cf_name].Add(avg_accesses);
|
||||
stdev_naccesses_per_key_in_a_data_block.Add(stdev_accesses);
|
||||
cf_stdev_naccesses_per_key_in_a_data_block[cf_name].Add(
|
||||
stdev_accesses);
|
||||
|
||||
existing_keys_stats.Add(percent_referenced_for_existing_keys);
|
||||
cf_existing_keys_stats_map[cf_name].Add(
|
||||
percent_referenced_for_existing_keys);
|
||||
@ -356,7 +396,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
|
||||
}
|
||||
}
|
||||
fprintf(stdout,
|
||||
"Histogram on percentage of referenced keys existing in a block over "
|
||||
"Histogram on the number of referenced keys existing in a block over "
|
||||
"the total number of keys in a block: \n%s",
|
||||
existing_keys_stats.ToString().c_str());
|
||||
for (auto const& cf_stats : cf_existing_keys_stats_map) {
|
||||
@ -367,7 +407,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
|
||||
print_break_lines(/*num_break_lines=*/1);
|
||||
fprintf(
|
||||
stdout,
|
||||
"Histogram on percentage of referenced keys DO NOT exist in a block over "
|
||||
"Histogram on the number of referenced keys DO NOT exist in a block over "
|
||||
"the total number of keys in a block: \n%s",
|
||||
non_existing_keys_stats.ToString().c_str());
|
||||
for (auto const& cf_stats : cf_non_existing_keys_stats_map) {
|
||||
@ -377,7 +417,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
|
||||
}
|
||||
print_break_lines(/*num_break_lines=*/1);
|
||||
fprintf(stdout,
|
||||
"Histogram on percentage of accesses on keys exist in a block over "
|
||||
"Histogram on the number of accesses on keys exist in a block over "
|
||||
"the total number of accesses in a block: \n%s",
|
||||
block_access_stats.ToString().c_str());
|
||||
for (auto const& cf_stats : cf_block_access_info) {
|
||||
@ -385,6 +425,24 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
|
||||
fprintf(stdout, "Break down by column family %s: \n%s",
|
||||
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
|
||||
}
|
||||
print_break_lines(/*num_break_lines=*/1);
|
||||
fprintf(
|
||||
stdout,
|
||||
"Histogram on the average number of accesses per key in a block: \n%s",
|
||||
avg_naccesses_per_key_in_a_data_block.ToString().c_str());
|
||||
for (auto const& cf_stats : cf_avg_naccesses_per_key_in_a_data_block) {
|
||||
fprintf(stdout, "Break down by column family %s: \n%s",
|
||||
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
|
||||
}
|
||||
print_break_lines(/*num_break_lines=*/1);
|
||||
fprintf(stdout,
|
||||
"Histogram on the standard deviation of the number of accesses per "
|
||||
"key in a block: \n%s",
|
||||
stdev_naccesses_per_key_in_a_data_block.ToString().c_str());
|
||||
for (auto const& cf_stats : cf_stdev_naccesses_per_key_in_a_data_block) {
|
||||
fprintf(stdout, "Break down by column family %s: \n%s",
|
||||
cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
|
||||
@ -456,40 +514,49 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
|
||||
print_break_lines(/*num_break_lines=*/3);
|
||||
fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str());
|
||||
fprintf(stdout,
|
||||
"Number of files:%" PRIu64 "Number of blocks: %" PRIu64
|
||||
"Number of accesses: %" PRIu64 "\n",
|
||||
" Number of files:%" PRIu64 " Number of blocks: %" PRIu64
|
||||
" Number of accesses: %" PRIu64 "\n",
|
||||
cf_num_files, cf_num_blocks, cf_num_accesses);
|
||||
for (auto block_type : cf_bt_blocks) {
|
||||
fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n",
|
||||
block_type_to_string(block_type.first).c_str(),
|
||||
block_type.second);
|
||||
fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n",
|
||||
block_type_to_string(block_type.first).c_str(), block_type.second,
|
||||
percent(block_type.second, cf_num_blocks));
|
||||
}
|
||||
for (auto caller : cf_caller_num_accesses_map) {
|
||||
const uint64_t naccesses = caller.second;
|
||||
print_break_lines(/*num_break_lines=*/1);
|
||||
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n",
|
||||
caller_to_string(caller.first).c_str(), caller.second);
|
||||
fprintf(stdout,
|
||||
"Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n",
|
||||
caller_to_string(caller.first).c_str(), naccesses,
|
||||
percent(naccesses, cf_num_accesses));
|
||||
fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
|
||||
caller_to_string(caller.first).c_str());
|
||||
for (auto naccess_level :
|
||||
cf_caller_level_num_accesses_map[caller.first]) {
|
||||
fprintf(stdout,
|
||||
"\t Level %" PRIu64 ": Number of accesses: %" PRIu64 "\n",
|
||||
naccess_level.first, naccess_level.second);
|
||||
"\t Level %" PRIu64 ": Number of accesses: %" PRIu64
|
||||
" Percent: %.2f\n",
|
||||
naccess_level.first, naccess_level.second,
|
||||
percent(naccess_level.second, naccesses));
|
||||
}
|
||||
fprintf(stdout, "Caller %s: Number of accesses per file break down\n",
|
||||
caller_to_string(caller.first).c_str());
|
||||
for (auto naccess_file : cf_caller_file_num_accesses_map[caller.first]) {
|
||||
fprintf(stdout,
|
||||
"\t File %" PRIu64 ": Number of accesses: %" PRIu64 "\n",
|
||||
naccess_file.first, naccess_file.second);
|
||||
"\t File %" PRIu64 ": Number of accesses: %" PRIu64
|
||||
" Percent: %.2f\n",
|
||||
naccess_file.first, naccess_file.second,
|
||||
percent(naccess_file.second, naccesses));
|
||||
}
|
||||
fprintf(stdout,
|
||||
"Caller %s: Number of accesses per block type break down\n",
|
||||
caller_to_string(caller.first).c_str());
|
||||
for (auto naccess_type : cf_caller_bt_num_accesses_map[caller.first]) {
|
||||
fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n",
|
||||
fprintf(stdout,
|
||||
"\t Block Type %s: Number of accesses: %" PRIu64
|
||||
" Percent: %.2f\n",
|
||||
block_type_to_string(naccess_type.first).c_str(),
|
||||
naccess_type.second);
|
||||
naccess_type.second, percent(naccess_type.second, naccesses));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -500,25 +567,32 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
|
||||
" Number of accesses: %" PRIu64 "\n",
|
||||
total_num_files, total_num_blocks, total_num_accesses);
|
||||
for (auto block_type : bt_num_blocks_map) {
|
||||
fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n",
|
||||
block_type_to_string(block_type.first).c_str(), block_type.second);
|
||||
fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n",
|
||||
block_type_to_string(block_type.first).c_str(), block_type.second,
|
||||
percent(block_type.second, total_num_blocks));
|
||||
}
|
||||
for (auto caller : caller_num_access_map) {
|
||||
print_break_lines(/*num_break_lines=*/1);
|
||||
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n",
|
||||
caller_to_string(caller.first).c_str(), caller.second);
|
||||
uint64_t naccesses = caller.second;
|
||||
fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n",
|
||||
caller_to_string(caller.first).c_str(), naccesses,
|
||||
percent(naccesses, total_num_accesses));
|
||||
fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
|
||||
caller_to_string(caller.first).c_str());
|
||||
for (auto naccess_level : caller_level_num_access_map[caller.first]) {
|
||||
fprintf(stdout, "\t Level %d: Number of accesses: %" PRIu64 "\n",
|
||||
naccess_level.first, naccess_level.second);
|
||||
fprintf(stdout,
|
||||
"\t Level %d: Number of accesses: %" PRIu64 " Percent: %.2f\n",
|
||||
naccess_level.first, naccess_level.second,
|
||||
percent(naccess_level.second, naccesses));
|
||||
}
|
||||
fprintf(stdout, "Caller %s: Number of accesses per block type break down\n",
|
||||
caller_to_string(caller.first).c_str());
|
||||
for (auto naccess_type : caller_bt_num_access_map[caller.first]) {
|
||||
fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n",
|
||||
fprintf(stdout,
|
||||
"\t Block Type %s: Number of accesses: %" PRIu64
|
||||
" Percent: %.2f\n",
|
||||
block_type_to_string(naccess_type.first).c_str(),
|
||||
naccess_type.second);
|
||||
naccess_type.second, percent(naccess_type.second, naccesses));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -575,12 +649,15 @@ int block_cache_trace_analyzer_tool(int argc, char** argv) {
|
||||
}
|
||||
uint64_t warmup_seconds =
|
||||
FLAGS_cache_sim_warmup_seconds > 0 ? FLAGS_cache_sim_warmup_seconds : 0;
|
||||
uint32_t downsample_ratio = FLAGS_block_cache_trace_downsample_ratio > 0
|
||||
? FLAGS_block_cache_trace_downsample_ratio
|
||||
: 0;
|
||||
std::vector<CacheConfiguration> cache_configs =
|
||||
parse_cache_config_file(FLAGS_block_cache_sim_config_path);
|
||||
std::unique_ptr<BlockCacheTraceSimulator> cache_simulator;
|
||||
if (!cache_configs.empty()) {
|
||||
cache_simulator.reset(
|
||||
new BlockCacheTraceSimulator(warmup_seconds, cache_configs));
|
||||
cache_simulator.reset(new BlockCacheTraceSimulator(
|
||||
warmup_seconds, downsample_ratio, cache_configs));
|
||||
}
|
||||
BlockCacheTraceAnalyzer analyzer(FLAGS_block_cache_trace_path,
|
||||
FLAGS_output_miss_ratio_curve_path,
|
||||
|
@ -31,7 +31,7 @@ class BlockCacheTraceSimulator {
|
||||
// warmup_seconds: The number of seconds to warmup simulated caches. The
|
||||
// hit/miss counters are reset after the warmup completes.
|
||||
BlockCacheTraceSimulator(
|
||||
uint64_t warmup_seconds,
|
||||
uint64_t warmup_seconds, uint32_t downsample_ratio,
|
||||
const std::vector<CacheConfiguration>& cache_configurations);
|
||||
~BlockCacheTraceSimulator() = default;
|
||||
// No copy and move.
|
||||
@ -52,6 +52,7 @@ class BlockCacheTraceSimulator {
|
||||
|
||||
private:
|
||||
const uint64_t warmup_seconds_;
|
||||
const uint32_t downsample_ratio_;
|
||||
const std::vector<CacheConfiguration> cache_configurations_;
|
||||
|
||||
bool warmup_complete_ = false;
|
||||
|
Loading…
Reference in New Issue
Block a user