diff --git a/tools/block_cache_trace_analyzer.cc b/tools/block_cache_trace_analyzer.cc index 3fd93a023..a8259de71 100644 --- a/tools/block_cache_trace_analyzer.cc +++ b/tools/block_cache_trace_analyzer.cc @@ -27,6 +27,10 @@ DEFINE_string( "cache_name,num_shard_bits,cache_capacity_1,...,cache_capacity_N. " "cache_name is lru. cache_capacity can be xK, xM or xG " "where x is a positive number."); +DEFINE_int32(block_cache_trace_downsample_ratio, 1, + "The trace collected accesses on one in every " + "block_cache_trace_downsample_ratio blocks. We scale " + "down the simulated cache size by this ratio."); DEFINE_bool(print_block_size_stats, false, "Print block size distribution and the distribution break down by " "block type and column family."); @@ -91,18 +95,30 @@ void print_break_lines(uint32_t num_break_lines) { } } +double percent(uint64_t numerator, uint64_t denomenator) { + if (denomenator == 0) { + return -1; + } + return static_cast(numerator * 100.0 / denomenator); +} + } // namespace BlockCacheTraceSimulator::BlockCacheTraceSimulator( - uint64_t warmup_seconds, + uint64_t warmup_seconds, uint32_t downsample_ratio, const std::vector& cache_configurations) : warmup_seconds_(warmup_seconds), + downsample_ratio_(downsample_ratio), cache_configurations_(cache_configurations) { for (auto const& config : cache_configurations_) { for (auto cache_capacity : config.cache_capacities) { - sim_caches_.push_back( - NewSimCache(NewLRUCache(cache_capacity, config.num_shard_bits), - /*real_cache=*/nullptr, config.num_shard_bits)); + // Scale down the cache capacity since the trace contains accesses on + // 1/'downsample_ratio' blocks. + uint64_t simulate_cache_capacity = + cache_capacity / downsample_ratio_; + sim_caches_.push_back(NewSimCache( + NewLRUCache(simulate_cache_capacity, config.num_shard_bits), + /*real_cache=*/nullptr, config.num_shard_bits)); } } } @@ -285,11 +301,12 @@ void BlockCacheTraceAnalyzer::PrintAccessCountStats() const { } } } - fprintf(stdout, "Block access count stats: \n%s", + fprintf(stdout, + "Block access count stats: The number of accesses per block.\n%s", access_stats.ToString().c_str()); for (auto const& bt_stats : bt_stats_map) { print_break_lines(/*num_break_lines=*/1); - fprintf(stdout, "Block access count stats for block type %s: \n%s", + fprintf(stdout, "Break down by block type %s: \n%s", block_type_to_string(bt_stats.first).c_str(), bt_stats.second.ToString().c_str()); } @@ -298,7 +315,7 @@ void BlockCacheTraceAnalyzer::PrintAccessCountStats() const { for (auto const& bt_stats : cf_bt_stats.second) { print_break_lines(/*num_break_lines=*/1); fprintf(stdout, - "Block access count stats for column family %s and block type " + "Break down by column family %s and block type " "%s: \n%s", cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(), bt_stats.second.ToString().c_str()); @@ -313,6 +330,15 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { std::map cf_non_existing_keys_stats_map; HistogramStat block_access_stats; std::map cf_block_access_info; + HistogramStat percent_referenced_bytes; + std::map cf_percent_referenced_bytes; + // Total number of accesses in a data block / number of keys in a data block. + HistogramStat avg_naccesses_per_key_in_a_data_block; + std::map cf_avg_naccesses_per_key_in_a_data_block; + // The standard deviation on the number of accesses of a key in a data block. + HistogramStat stdev_naccesses_per_key_in_a_data_block; + std::map + cf_stdev_naccesses_per_key_in_a_data_block; for (auto const& cf_aggregates : cf_aggregates_map_) { // Stats per column family. @@ -343,6 +369,20 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { block_access_info.second.num_referenced_key_exist_in_block / (double)block_access_info.second.num_accesses) * 10000.0); + + HistogramStat hist_naccess_per_key; + for (auto const& key_access : + block_access_info.second.key_num_access_map) { + hist_naccess_per_key.Add(key_access.second); + } + uint64_t avg_accesses = hist_naccess_per_key.Average(); + uint64_t stdev_accesses = hist_naccess_per_key.StandardDeviation(); + avg_naccesses_per_key_in_a_data_block.Add(avg_accesses); + cf_avg_naccesses_per_key_in_a_data_block[cf_name].Add(avg_accesses); + stdev_naccesses_per_key_in_a_data_block.Add(stdev_accesses); + cf_stdev_naccesses_per_key_in_a_data_block[cf_name].Add( + stdev_accesses); + existing_keys_stats.Add(percent_referenced_for_existing_keys); cf_existing_keys_stats_map[cf_name].Add( percent_referenced_for_existing_keys); @@ -356,7 +396,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { } } fprintf(stdout, - "Histogram on percentage of referenced keys existing in a block over " + "Histogram on the number of referenced keys existing in a block over " "the total number of keys in a block: \n%s", existing_keys_stats.ToString().c_str()); for (auto const& cf_stats : cf_existing_keys_stats_map) { @@ -367,7 +407,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { print_break_lines(/*num_break_lines=*/1); fprintf( stdout, - "Histogram on percentage of referenced keys DO NOT exist in a block over " + "Histogram on the number of referenced keys DO NOT exist in a block over " "the total number of keys in a block: \n%s", non_existing_keys_stats.ToString().c_str()); for (auto const& cf_stats : cf_non_existing_keys_stats_map) { @@ -377,7 +417,7 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { } print_break_lines(/*num_break_lines=*/1); fprintf(stdout, - "Histogram on percentage of accesses on keys exist in a block over " + "Histogram on the number of accesses on keys exist in a block over " "the total number of accesses in a block: \n%s", block_access_stats.ToString().c_str()); for (auto const& cf_stats : cf_block_access_info) { @@ -385,6 +425,24 @@ void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const { fprintf(stdout, "Break down by column family %s: \n%s", cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); } + print_break_lines(/*num_break_lines=*/1); + fprintf( + stdout, + "Histogram on the average number of accesses per key in a block: \n%s", + avg_naccesses_per_key_in_a_data_block.ToString().c_str()); + for (auto const& cf_stats : cf_avg_naccesses_per_key_in_a_data_block) { + fprintf(stdout, "Break down by column family %s: \n%s", + cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); + } + print_break_lines(/*num_break_lines=*/1); + fprintf(stdout, + "Histogram on the standard deviation of the number of accesses per " + "key in a block: \n%s", + stdev_naccesses_per_key_in_a_data_block.ToString().c_str()); + for (auto const& cf_stats : cf_stdev_naccesses_per_key_in_a_data_block) { + fprintf(stdout, "Break down by column family %s: \n%s", + cf_stats.first.c_str(), cf_stats.second.ToString().c_str()); + } } void BlockCacheTraceAnalyzer::PrintStatsSummary() const { @@ -456,40 +514,49 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const { print_break_lines(/*num_break_lines=*/3); fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str()); fprintf(stdout, - "Number of files:%" PRIu64 "Number of blocks: %" PRIu64 - "Number of accesses: %" PRIu64 "\n", + " Number of files:%" PRIu64 " Number of blocks: %" PRIu64 + " Number of accesses: %" PRIu64 "\n", cf_num_files, cf_num_blocks, cf_num_accesses); for (auto block_type : cf_bt_blocks) { - fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n", - block_type_to_string(block_type.first).c_str(), - block_type.second); + fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n", + block_type_to_string(block_type.first).c_str(), block_type.second, + percent(block_type.second, cf_num_blocks)); } for (auto caller : cf_caller_num_accesses_map) { + const uint64_t naccesses = caller.second; print_break_lines(/*num_break_lines=*/1); - fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n", - caller_to_string(caller.first).c_str(), caller.second); + fprintf(stdout, + "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n", + caller_to_string(caller.first).c_str(), naccesses, + percent(naccesses, cf_num_accesses)); fprintf(stdout, "Caller %s: Number of accesses per level break down\n", caller_to_string(caller.first).c_str()); for (auto naccess_level : cf_caller_level_num_accesses_map[caller.first]) { fprintf(stdout, - "\t Level %" PRIu64 ": Number of accesses: %" PRIu64 "\n", - naccess_level.first, naccess_level.second); + "\t Level %" PRIu64 ": Number of accesses: %" PRIu64 + " Percent: %.2f\n", + naccess_level.first, naccess_level.second, + percent(naccess_level.second, naccesses)); } fprintf(stdout, "Caller %s: Number of accesses per file break down\n", caller_to_string(caller.first).c_str()); for (auto naccess_file : cf_caller_file_num_accesses_map[caller.first]) { fprintf(stdout, - "\t File %" PRIu64 ": Number of accesses: %" PRIu64 "\n", - naccess_file.first, naccess_file.second); + "\t File %" PRIu64 ": Number of accesses: %" PRIu64 + " Percent: %.2f\n", + naccess_file.first, naccess_file.second, + percent(naccess_file.second, naccesses)); } fprintf(stdout, "Caller %s: Number of accesses per block type break down\n", caller_to_string(caller.first).c_str()); for (auto naccess_type : cf_caller_bt_num_accesses_map[caller.first]) { - fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n", + fprintf(stdout, + "\t Block Type %s: Number of accesses: %" PRIu64 + " Percent: %.2f\n", block_type_to_string(naccess_type.first).c_str(), - naccess_type.second); + naccess_type.second, percent(naccess_type.second, naccesses)); } } } @@ -500,25 +567,32 @@ void BlockCacheTraceAnalyzer::PrintStatsSummary() const { " Number of accesses: %" PRIu64 "\n", total_num_files, total_num_blocks, total_num_accesses); for (auto block_type : bt_num_blocks_map) { - fprintf(stdout, "Number of %s blocks: %" PRIu64 "\n", - block_type_to_string(block_type.first).c_str(), block_type.second); + fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n", + block_type_to_string(block_type.first).c_str(), block_type.second, + percent(block_type.second, total_num_blocks)); } for (auto caller : caller_num_access_map) { print_break_lines(/*num_break_lines=*/1); - fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 "\n", - caller_to_string(caller.first).c_str(), caller.second); + uint64_t naccesses = caller.second; + fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n", + caller_to_string(caller.first).c_str(), naccesses, + percent(naccesses, total_num_accesses)); fprintf(stdout, "Caller %s: Number of accesses per level break down\n", caller_to_string(caller.first).c_str()); for (auto naccess_level : caller_level_num_access_map[caller.first]) { - fprintf(stdout, "\t Level %d: Number of accesses: %" PRIu64 "\n", - naccess_level.first, naccess_level.second); + fprintf(stdout, + "\t Level %d: Number of accesses: %" PRIu64 " Percent: %.2f\n", + naccess_level.first, naccess_level.second, + percent(naccess_level.second, naccesses)); } fprintf(stdout, "Caller %s: Number of accesses per block type break down\n", caller_to_string(caller.first).c_str()); for (auto naccess_type : caller_bt_num_access_map[caller.first]) { - fprintf(stdout, "\t Block Type %s: Number of accesses: %" PRIu64 "\n", + fprintf(stdout, + "\t Block Type %s: Number of accesses: %" PRIu64 + " Percent: %.2f\n", block_type_to_string(naccess_type.first).c_str(), - naccess_type.second); + naccess_type.second, percent(naccess_type.second, naccesses)); } } } @@ -575,12 +649,15 @@ int block_cache_trace_analyzer_tool(int argc, char** argv) { } uint64_t warmup_seconds = FLAGS_cache_sim_warmup_seconds > 0 ? FLAGS_cache_sim_warmup_seconds : 0; + uint32_t downsample_ratio = FLAGS_block_cache_trace_downsample_ratio > 0 + ? FLAGS_block_cache_trace_downsample_ratio + : 0; std::vector cache_configs = parse_cache_config_file(FLAGS_block_cache_sim_config_path); std::unique_ptr cache_simulator; if (!cache_configs.empty()) { - cache_simulator.reset( - new BlockCacheTraceSimulator(warmup_seconds, cache_configs)); + cache_simulator.reset(new BlockCacheTraceSimulator( + warmup_seconds, downsample_ratio, cache_configs)); } BlockCacheTraceAnalyzer analyzer(FLAGS_block_cache_trace_path, FLAGS_output_miss_ratio_curve_path, diff --git a/tools/block_cache_trace_analyzer.h b/tools/block_cache_trace_analyzer.h index 1420906f3..0690d14d0 100644 --- a/tools/block_cache_trace_analyzer.h +++ b/tools/block_cache_trace_analyzer.h @@ -31,7 +31,7 @@ class BlockCacheTraceSimulator { // warmup_seconds: The number of seconds to warmup simulated caches. The // hit/miss counters are reset after the warmup completes. BlockCacheTraceSimulator( - uint64_t warmup_seconds, + uint64_t warmup_seconds, uint32_t downsample_ratio, const std::vector& cache_configurations); ~BlockCacheTraceSimulator() = default; // No copy and move. @@ -52,6 +52,7 @@ class BlockCacheTraceSimulator { private: const uint64_t warmup_seconds_; + const uint32_t downsample_ratio_; const std::vector cache_configurations_; bool warmup_complete_ = false;