Block cache analyzer: Add more stats (#5516)
Summary: This PR provides more command line options for block cache analyzer to better understand block cache access pattern. -analyze_bottom_k_access_count_blocks -analyze_top_k_access_count_blocks -reuse_lifetime_labels -reuse_lifetime_buckets -analyze_callers -access_count_buckets -analyze_blocks_reuse_k_reuse_window Pull Request resolved: https://github.com/facebook/rocksdb/pull/5516 Test Plan: make clean && COMPILE_WITH_ASAN=1 make check -j32 Differential Revision: D16037440 Pulled By: HaoyuHuang fbshipit-source-id: b9a4ac0d4712053fab910732077a4d4b91400bc8
This commit is contained in:
parent
1a59b6e2a9
commit
3e9c5a3523
File diff suppressed because it is too large
Load Diff
@ -9,13 +9,13 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "db/dbformat.h"
|
||||||
#include "rocksdb/env.h"
|
#include "rocksdb/env.h"
|
||||||
#include "rocksdb/utilities/sim_cache.h"
|
#include "rocksdb/utilities/sim_cache.h"
|
||||||
#include "trace_replay/block_cache_tracer.h"
|
#include "trace_replay/block_cache_tracer.h"
|
||||||
#include "utilities/simulator_cache/cache_simulator.h"
|
#include "utilities/simulator_cache/cache_simulator.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
// Statistics of a block.
|
// Statistics of a block.
|
||||||
struct BlockAccessInfo {
|
struct BlockAccessInfo {
|
||||||
uint64_t num_accesses = 0;
|
uint64_t num_accesses = 0;
|
||||||
@ -23,11 +23,12 @@ struct BlockAccessInfo {
|
|||||||
uint64_t first_access_time = 0;
|
uint64_t first_access_time = 0;
|
||||||
uint64_t last_access_time = 0;
|
uint64_t last_access_time = 0;
|
||||||
uint64_t num_keys = 0;
|
uint64_t num_keys = 0;
|
||||||
std::map<std::string, uint64_t>
|
std::map<std::string, std::map<TableReaderCaller, uint64_t>>
|
||||||
key_num_access_map; // for keys exist in this block.
|
key_num_access_map; // for keys exist in this block.
|
||||||
std::map<std::string, uint64_t>
|
std::map<std::string, std::map<TableReaderCaller, uint64_t>>
|
||||||
non_exist_key_num_access_map; // for keys do not exist in this block.
|
non_exist_key_num_access_map; // for keys do not exist in this block.
|
||||||
uint64_t num_referenced_key_exist_in_block = 0;
|
uint64_t num_referenced_key_exist_in_block = 0;
|
||||||
|
uint64_t referenced_data_size = 0;
|
||||||
std::map<TableReaderCaller, uint64_t> caller_num_access_map;
|
std::map<TableReaderCaller, uint64_t> caller_num_access_map;
|
||||||
// caller:timestamp:number_of_accesses. The granularity of the timestamp is
|
// caller:timestamp:number_of_accesses. The granularity of the timestamp is
|
||||||
// seconds.
|
// seconds.
|
||||||
@ -39,6 +40,12 @@ struct BlockAccessInfo {
|
|||||||
std::map<uint64_t, uint64_t> reuse_distance_count;
|
std::map<uint64_t, uint64_t> reuse_distance_count;
|
||||||
|
|
||||||
void AddAccess(const BlockCacheTraceRecord& access) {
|
void AddAccess(const BlockCacheTraceRecord& access) {
|
||||||
|
if (block_size != 0 && access.block_size != 0) {
|
||||||
|
assert(block_size == access.block_size);
|
||||||
|
}
|
||||||
|
if (num_keys != 0 && access.num_keys_in_block != 0) {
|
||||||
|
assert(num_keys == access.num_keys_in_block);
|
||||||
|
}
|
||||||
if (first_access_time == 0) {
|
if (first_access_time == 0) {
|
||||||
first_access_time = access.access_timestamp;
|
first_access_time = access.access_timestamp;
|
||||||
}
|
}
|
||||||
@ -54,10 +61,18 @@ struct BlockAccessInfo {
|
|||||||
access.caller)) {
|
access.caller)) {
|
||||||
num_keys = access.num_keys_in_block;
|
num_keys = access.num_keys_in_block;
|
||||||
if (access.referenced_key_exist_in_block == Boolean::kTrue) {
|
if (access.referenced_key_exist_in_block == Boolean::kTrue) {
|
||||||
key_num_access_map[access.referenced_key]++;
|
if (key_num_access_map.find(access.referenced_key) ==
|
||||||
|
key_num_access_map.end()) {
|
||||||
|
referenced_data_size += access.referenced_data_size;
|
||||||
|
}
|
||||||
|
key_num_access_map[access.referenced_key][access.caller]++;
|
||||||
num_referenced_key_exist_in_block++;
|
num_referenced_key_exist_in_block++;
|
||||||
|
if (referenced_data_size > block_size && block_size != 0) {
|
||||||
|
ParsedInternalKey internal_key;
|
||||||
|
ParseInternalKey(access.referenced_key, &internal_key);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
non_exist_key_num_access_map[access.referenced_key]++;
|
non_exist_key_num_access_map[access.referenced_key][access.caller]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -83,6 +98,7 @@ class BlockCacheTraceAnalyzer {
|
|||||||
public:
|
public:
|
||||||
BlockCacheTraceAnalyzer(
|
BlockCacheTraceAnalyzer(
|
||||||
const std::string& trace_file_path, const std::string& output_dir,
|
const std::string& trace_file_path, const std::string& output_dir,
|
||||||
|
bool compute_reuse_distance,
|
||||||
std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator);
|
std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator);
|
||||||
~BlockCacheTraceAnalyzer() = default;
|
~BlockCacheTraceAnalyzer() = default;
|
||||||
// No copy and move.
|
// No copy and move.
|
||||||
@ -122,7 +138,8 @@ class BlockCacheTraceAnalyzer {
|
|||||||
|
|
||||||
// Print access count distribution and the distribution break down by block
|
// Print access count distribution and the distribution break down by block
|
||||||
// type and column family.
|
// type and column family.
|
||||||
void PrintAccessCountStats() const;
|
void PrintAccessCountStats(bool user_access_only, uint32_t bottom_k,
|
||||||
|
uint32_t top_k) const;
|
||||||
|
|
||||||
// Print data block accesses by user Get and Multi-Get.
|
// Print data block accesses by user Get and Multi-Get.
|
||||||
// It prints out 1) A histogram on the percentage of keys accessed in a data
|
// It prints out 1) A histogram on the percentage of keys accessed in a data
|
||||||
@ -131,24 +148,93 @@ class BlockCacheTraceAnalyzer {
|
|||||||
// accesses on keys exist in a data block and its break down by column family.
|
// accesses on keys exist in a data block and its break down by column family.
|
||||||
void PrintDataBlockAccessStats() const;
|
void PrintDataBlockAccessStats() const;
|
||||||
|
|
||||||
|
// Write the percentage of accesses break down by column family into a csv
|
||||||
|
// file saved in 'output_dir'.
|
||||||
|
//
|
||||||
|
// The file is named "percentage_of_accesses_summary". The file format is
|
||||||
|
// caller,cf_0,cf_1,...,cf_n where the cf_i is the column family name found in
|
||||||
|
// the trace.
|
||||||
|
void WritePercentAccessSummaryStats() const;
|
||||||
|
|
||||||
|
// Write the percentage of accesses for the given caller break down by column
|
||||||
|
// family, level, and block type into a csv file saved in 'output_dir'.
|
||||||
|
//
|
||||||
|
// It generates two files: 1) caller_level_percentage_of_accesses_summary and
|
||||||
|
// 2) caller_bt_percentage_of_accesses_summary which break down by the level
|
||||||
|
// and block type, respectively. The file format is
|
||||||
|
// level/bt,cf_0,cf_1,...,cf_n where cf_i is the column family name found in
|
||||||
|
// the trace.
|
||||||
|
void WriteDetailedPercentAccessSummaryStats(TableReaderCaller caller) const;
|
||||||
|
|
||||||
|
// Write the access count summary into a csv file saved in 'output_dir'.
|
||||||
|
// It groups blocks by their access count.
|
||||||
|
//
|
||||||
|
// It generates two files: 1) cf_access_count_summary and 2)
|
||||||
|
// bt_access_count_summary which break down the access count by column family
|
||||||
|
// and block type, respectively. The file format is
|
||||||
|
// cf/bt,bucket_0,bucket_1,...,bucket_N.
|
||||||
|
void WriteAccessCountSummaryStats(
|
||||||
|
const std::vector<uint64_t>& access_count_buckets,
|
||||||
|
bool user_access_only) const;
|
||||||
|
|
||||||
// Write miss ratio curves of simulated cache configurations into a csv file
|
// Write miss ratio curves of simulated cache configurations into a csv file
|
||||||
// saved in 'output_dir'.
|
// named "mrc" saved in 'output_dir'.
|
||||||
|
//
|
||||||
|
// The file format is
|
||||||
|
// "cache_name,num_shard_bits,capacity,miss_ratio,total_accesses".
|
||||||
void WriteMissRatioCurves() const;
|
void WriteMissRatioCurves() const;
|
||||||
|
|
||||||
// Write the access timeline into a csv file saved in 'output_dir'.
|
// Write the access timeline into a csv file saved in 'output_dir'.
|
||||||
void WriteAccessTimeline(const std::string& label) const;
|
//
|
||||||
|
// The file is named "label_access_timeline".The file format is
|
||||||
|
// "time,label_1_access_per_second,label_2_access_per_second,...,label_N_access_per_second"
|
||||||
|
// where N is the number of unique labels found in the trace.
|
||||||
|
void WriteAccessTimeline(const std::string& label, uint64_t time_unit,
|
||||||
|
bool user_access_only) const;
|
||||||
|
|
||||||
// Write the reuse distance into a csv file saved in 'output_dir'. Reuse
|
// Write the reuse distance into a csv file saved in 'output_dir'. Reuse
|
||||||
// distance is defined as the cumulated size of unique blocks read between two
|
// distance is defined as the cumulated size of unique blocks read between two
|
||||||
// consective accesses on the same block.
|
// consective accesses on the same block.
|
||||||
|
//
|
||||||
|
// The file is named "label_reuse_distance". The file format is
|
||||||
|
// bucket,label_1,label_2,...,label_N.
|
||||||
void WriteReuseDistance(const std::string& label_str,
|
void WriteReuseDistance(const std::string& label_str,
|
||||||
const std::set<uint64_t>& distance_buckets) const;
|
const std::vector<uint64_t>& distance_buckets) const;
|
||||||
|
|
||||||
// Write the reuse interval into a csv file saved in 'output_dir'. Reuse
|
// Write the reuse interval into a csv file saved in 'output_dir'. Reuse
|
||||||
// interval is defined as the time between two consecutive accesses on the
|
// interval is defined as the time between two consecutive accesses on the
|
||||||
// same block..
|
// same block.
|
||||||
|
//
|
||||||
|
// The file is named "label_reuse_interval". The file format is
|
||||||
|
// bucket,label_1,label_2,...,label_N.
|
||||||
void WriteReuseInterval(const std::string& label_str,
|
void WriteReuseInterval(const std::string& label_str,
|
||||||
const std::set<uint64_t>& time_buckets) const;
|
const std::vector<uint64_t>& time_buckets) const;
|
||||||
|
|
||||||
|
// Write the reuse lifetime into a csv file saved in 'output_dir'. Reuse
|
||||||
|
// lifetime is defined as the time interval between the first access of a
|
||||||
|
// block and its last access.
|
||||||
|
//
|
||||||
|
// The file is named "label_reuse_lifetime". The file format is
|
||||||
|
// bucket,label_1,label_2,...,label_N.
|
||||||
|
void WriteReuseLifetime(const std::string& label_str,
|
||||||
|
const std::vector<uint64_t>& time_buckets) const;
|
||||||
|
|
||||||
|
// Write the reuse timeline into a csv file saved in 'output_dir'.
|
||||||
|
//
|
||||||
|
// The file is named
|
||||||
|
// "block_type_user_access_only_reuse_window_reuse_timeline". The file format
|
||||||
|
// is start_time,0,1,...,N where N equals trace_duration / reuse_window.
|
||||||
|
void WriteBlockReuseTimeline(uint64_t reuse_window, bool user_access_only,
|
||||||
|
TraceType block_type) const;
|
||||||
|
|
||||||
|
// Write the Get spatical locality into csv files saved in 'output_dir'.
|
||||||
|
//
|
||||||
|
// It generates three csv files. label_percent_ref_keys,
|
||||||
|
// label_percent_accesses_on_ref_keys, and
|
||||||
|
// label_percent_data_size_on_ref_keys.
|
||||||
|
void WriteGetSpatialLocality(
|
||||||
|
const std::string& label_str,
|
||||||
|
const std::vector<uint64_t>& percent_buckets) const;
|
||||||
|
|
||||||
const std::map<std::string, ColumnFamilyAccessInfoAggregate>&
|
const std::map<std::string, ColumnFamilyAccessInfoAggregate>&
|
||||||
TEST_cf_aggregates_map() const {
|
TEST_cf_aggregates_map() const {
|
||||||
@ -161,28 +247,48 @@ class BlockCacheTraceAnalyzer {
|
|||||||
std::string BuildLabel(const std::set<std::string>& labels,
|
std::string BuildLabel(const std::set<std::string>& labels,
|
||||||
const std::string& cf_name, uint64_t fd,
|
const std::string& cf_name, uint64_t fd,
|
||||||
uint32_t level, TraceType type,
|
uint32_t level, TraceType type,
|
||||||
TableReaderCaller caller,
|
TableReaderCaller caller, uint64_t block_key) const;
|
||||||
const std::string& block_key) const;
|
|
||||||
|
|
||||||
void ComputeReuseDistance(BlockAccessInfo* info) const;
|
void ComputeReuseDistance(BlockAccessInfo* info) const;
|
||||||
|
|
||||||
void RecordAccess(const BlockCacheTraceRecord& access);
|
void RecordAccess(const BlockCacheTraceRecord& access);
|
||||||
|
|
||||||
void UpdateReuseIntervalStats(
|
void UpdateReuseIntervalStats(
|
||||||
const std::string& label, const std::set<uint64_t>& time_buckets,
|
const std::string& label, const std::vector<uint64_t>& time_buckets,
|
||||||
const std::map<uint64_t, uint64_t> timeline,
|
const std::map<uint64_t, uint64_t> timeline,
|
||||||
std::map<std::string, std::map<uint64_t, uint64_t>>*
|
std::map<std::string, std::map<uint64_t, uint64_t>>*
|
||||||
label_time_num_reuses,
|
label_time_num_reuses,
|
||||||
uint64_t* total_num_reuses) const;
|
uint64_t* total_num_reuses) const;
|
||||||
|
|
||||||
|
std::string OutputPercentAccessStats(
|
||||||
|
uint64_t total_accesses,
|
||||||
|
const std::map<std::string, uint64_t>& cf_access_count) const;
|
||||||
|
|
||||||
|
void WriteStatsToFile(
|
||||||
|
const std::string& label_str, const std::vector<uint64_t>& time_buckets,
|
||||||
|
const std::string& filename_suffix,
|
||||||
|
const std::map<std::string, std::map<uint64_t, uint64_t>>& label_data,
|
||||||
|
uint64_t ntotal) const;
|
||||||
|
|
||||||
|
void TraverseBlocks(
|
||||||
|
std::function<void(const std::string& /*cf_name*/, uint64_t /*fd*/,
|
||||||
|
uint32_t /*level*/, TraceType /*block_type*/,
|
||||||
|
const std::string& /*block_key*/,
|
||||||
|
uint64_t /*block_key_id*/,
|
||||||
|
const BlockAccessInfo& /*block_access_info*/)>
|
||||||
|
block_callback) const;
|
||||||
|
|
||||||
rocksdb::Env* env_;
|
rocksdb::Env* env_;
|
||||||
const std::string trace_file_path_;
|
const std::string trace_file_path_;
|
||||||
const std::string output_dir_;
|
const std::string output_dir_;
|
||||||
|
const bool compute_reuse_distance_;
|
||||||
|
|
||||||
BlockCacheTraceHeader header_;
|
BlockCacheTraceHeader header_;
|
||||||
std::unique_ptr<BlockCacheTraceSimulator> cache_simulator_;
|
std::unique_ptr<BlockCacheTraceSimulator> cache_simulator_;
|
||||||
std::map<std::string, ColumnFamilyAccessInfoAggregate> cf_aggregates_map_;
|
std::map<std::string, ColumnFamilyAccessInfoAggregate> cf_aggregates_map_;
|
||||||
std::map<std::string, BlockAccessInfo*> block_info_map_;
|
std::map<std::string, BlockAccessInfo*> block_info_map_;
|
||||||
|
uint64_t trace_start_timestamp_in_seconds_ = 0;
|
||||||
|
uint64_t trace_end_timestamp_in_seconds_ = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
int block_cache_trace_analyzer_tool(int argc, char** argv);
|
int block_cache_trace_analyzer_tool(int argc, char** argv);
|
||||||
|
@ -56,6 +56,12 @@ class BlockCacheTracerTest : public testing::Test {
|
|||||||
reuse_distance_buckets_ = "1,1K,1M,1G";
|
reuse_distance_buckets_ = "1,1K,1M,1G";
|
||||||
reuse_interval_labels_ = "block,all,cf,sst,level,bt,cf_sst,cf_level,cf_bt";
|
reuse_interval_labels_ = "block,all,cf,sst,level,bt,cf_sst,cf_level,cf_bt";
|
||||||
reuse_interval_buckets_ = "1,10,100,1000";
|
reuse_interval_buckets_ = "1,10,100,1000";
|
||||||
|
reuse_lifetime_labels_ = "block,all,cf,sst,level,bt,cf_sst,cf_level,cf_bt";
|
||||||
|
reuse_lifetime_buckets_ = "1,10,100,1000";
|
||||||
|
analyzing_callers_ = "Get,Iterator";
|
||||||
|
access_count_buckets_ = "2,3,4,5,10";
|
||||||
|
analyze_get_spatial_locality_labels_ = "all";
|
||||||
|
analyze_get_spatial_locality_buckets_ = "10,20,30,40,50,60,70,80,90,100";
|
||||||
}
|
}
|
||||||
|
|
||||||
~BlockCacheTracerTest() override {
|
~BlockCacheTracerTest() override {
|
||||||
@ -158,12 +164,22 @@ class BlockCacheTracerTest : public testing::Test {
|
|||||||
"-print_access_count_stats",
|
"-print_access_count_stats",
|
||||||
"-print_data_block_access_count_stats",
|
"-print_data_block_access_count_stats",
|
||||||
"-cache_sim_warmup_seconds=0",
|
"-cache_sim_warmup_seconds=0",
|
||||||
|
"-analyze_bottom_k_access_count_blocks=5",
|
||||||
|
"-analyze_top_k_access_count_blocks=5",
|
||||||
|
"-analyze_blocks_reuse_k_reuse_window=5",
|
||||||
"-timeline_labels=" + timeline_labels_,
|
"-timeline_labels=" + timeline_labels_,
|
||||||
"-reuse_distance_labels=" + reuse_distance_labels_,
|
"-reuse_distance_labels=" + reuse_distance_labels_,
|
||||||
"-reuse_distance_buckets=" + reuse_distance_buckets_,
|
"-reuse_distance_buckets=" + reuse_distance_buckets_,
|
||||||
"-reuse_interval_labels=" + reuse_interval_labels_,
|
"-reuse_interval_labels=" + reuse_interval_labels_,
|
||||||
"-reuse_interval_buckets=" + reuse_interval_buckets_,
|
"-reuse_interval_buckets=" + reuse_interval_buckets_,
|
||||||
};
|
"-reuse_lifetime_labels=" + reuse_lifetime_labels_,
|
||||||
|
"-reuse_lifetime_buckets=" + reuse_lifetime_buckets_,
|
||||||
|
"-analyze_callers=" + analyzing_callers_,
|
||||||
|
"-access_count_buckets=" + access_count_buckets_,
|
||||||
|
"-analyze_get_spatial_locality_labels=" +
|
||||||
|
analyze_get_spatial_locality_labels_,
|
||||||
|
"-analyze_get_spatial_locality_buckets=" +
|
||||||
|
analyze_get_spatial_locality_buckets_};
|
||||||
char arg_buffer[kArgBufferSize];
|
char arg_buffer[kArgBufferSize];
|
||||||
char* argv[kMaxArgCount];
|
char* argv[kMaxArgCount];
|
||||||
int argc = 0;
|
int argc = 0;
|
||||||
@ -189,6 +205,12 @@ class BlockCacheTracerTest : public testing::Test {
|
|||||||
std::string reuse_distance_buckets_;
|
std::string reuse_distance_buckets_;
|
||||||
std::string reuse_interval_labels_;
|
std::string reuse_interval_labels_;
|
||||||
std::string reuse_interval_buckets_;
|
std::string reuse_interval_buckets_;
|
||||||
|
std::string reuse_lifetime_labels_;
|
||||||
|
std::string reuse_lifetime_buckets_;
|
||||||
|
std::string analyzing_callers_;
|
||||||
|
std::string access_count_buckets_;
|
||||||
|
std::string analyze_get_spatial_locality_labels_;
|
||||||
|
std::string analyze_get_spatial_locality_buckets_;
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
|
TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
|
||||||
@ -247,51 +269,65 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
// Validate the timeline csv files.
|
// Validate the timeline csv files.
|
||||||
const uint32_t expected_num_lines = 50;
|
const std::vector<std::string> time_units{"_60", "_3600"};
|
||||||
|
const std::vector<std::string> user_access_only_flags{"user_access_only_",
|
||||||
|
"all_access_"};
|
||||||
|
for (auto const& user_access_only : user_access_only_flags) {
|
||||||
|
for (auto const& unit : time_units) {
|
||||||
std::stringstream ss(timeline_labels_);
|
std::stringstream ss(timeline_labels_);
|
||||||
while (ss.good()) {
|
while (ss.good()) {
|
||||||
std::string l;
|
std::string l;
|
||||||
ASSERT_TRUE(getline(ss, l, ','));
|
ASSERT_TRUE(getline(ss, l, ','));
|
||||||
const std::string timeline_file =
|
if (l.find("block") == std::string::npos) {
|
||||||
test_path_ + "/" + l + "_access_timeline";
|
if (unit != "_60" || user_access_only != "all_access_") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const std::string timeline_file = test_path_ + "/" +
|
||||||
|
user_access_only + l + unit +
|
||||||
|
"_access_timeline";
|
||||||
std::ifstream infile(timeline_file);
|
std::ifstream infile(timeline_file);
|
||||||
std::string line;
|
std::string line;
|
||||||
uint32_t nlines = 0;
|
const uint64_t expected_naccesses = 50;
|
||||||
ASSERT_TRUE(getline(infile, line));
|
const uint64_t expected_user_accesses = 30;
|
||||||
uint64_t expected_time = 1;
|
ASSERT_TRUE(getline(infile, line)) << timeline_file;
|
||||||
|
uint32_t naccesses = 0;
|
||||||
while (getline(infile, line)) {
|
while (getline(infile, line)) {
|
||||||
std::stringstream ss_naccess(line);
|
std::stringstream ss_naccess(line);
|
||||||
uint32_t naccesses = 0;
|
|
||||||
std::string substr;
|
std::string substr;
|
||||||
uint32_t time = 0;
|
bool read_label = false;
|
||||||
while (ss_naccess.good()) {
|
while (ss_naccess.good()) {
|
||||||
ASSERT_TRUE(getline(ss_naccess, substr, ','));
|
ASSERT_TRUE(getline(ss_naccess, substr, ','));
|
||||||
if (time == 0) {
|
if (!read_label) {
|
||||||
time = ParseUint32(substr);
|
read_label = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
naccesses += ParseUint32(substr);
|
naccesses += ParseUint32(substr);
|
||||||
}
|
}
|
||||||
nlines++;
|
|
||||||
ASSERT_EQ(1, naccesses);
|
|
||||||
ASSERT_EQ(expected_time, time);
|
|
||||||
expected_time += 1;
|
|
||||||
}
|
}
|
||||||
ASSERT_EQ(expected_num_lines, nlines);
|
if (user_access_only == "user_access_only_") {
|
||||||
|
ASSERT_EQ(expected_user_accesses, naccesses) << timeline_file;
|
||||||
|
} else {
|
||||||
|
ASSERT_EQ(expected_naccesses, naccesses) << timeline_file;
|
||||||
|
}
|
||||||
ASSERT_OK(env_->DeleteFile(timeline_file));
|
ASSERT_OK(env_->DeleteFile(timeline_file));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
{
|
{
|
||||||
// Validate the reuse_interval and reuse_distance csv files.
|
// Validate the reuse_interval and reuse_distance csv files.
|
||||||
std::map<std::string, std::string> test_reuse_csv_files;
|
std::map<std::string, std::string> test_reuse_csv_files;
|
||||||
test_reuse_csv_files["_reuse_interval"] = reuse_interval_labels_;
|
test_reuse_csv_files["_access_reuse_interval"] = reuse_interval_labels_;
|
||||||
test_reuse_csv_files["_reuse_distance"] = reuse_distance_labels_;
|
test_reuse_csv_files["_reuse_distance"] = reuse_distance_labels_;
|
||||||
|
test_reuse_csv_files["_reuse_lifetime"] = reuse_lifetime_labels_;
|
||||||
|
test_reuse_csv_files["_avg_reuse_interval"] = reuse_interval_labels_;
|
||||||
|
test_reuse_csv_files["_avg_reuse_interval_naccesses"] =
|
||||||
|
reuse_interval_labels_;
|
||||||
for (auto const& test : test_reuse_csv_files) {
|
for (auto const& test : test_reuse_csv_files) {
|
||||||
const std::string& file_suffix = test.first;
|
const std::string& file_suffix = test.first;
|
||||||
const std::string& labels = test.second;
|
const std::string& labels = test.second;
|
||||||
const uint32_t expected_num_rows = 10;
|
const uint32_t expected_num_rows = 5;
|
||||||
const uint32_t expected_num_rows_absolute_values = 5;
|
|
||||||
const uint32_t expected_reused_blocks = 0;
|
|
||||||
std::stringstream ss(labels);
|
std::stringstream ss(labels);
|
||||||
while (ss.good()) {
|
while (ss.good()) {
|
||||||
std::string l;
|
std::string l;
|
||||||
@ -300,7 +336,6 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
|
|||||||
std::ifstream infile(reuse_csv_file);
|
std::ifstream infile(reuse_csv_file);
|
||||||
std::string line;
|
std::string line;
|
||||||
ASSERT_TRUE(getline(infile, line));
|
ASSERT_TRUE(getline(infile, line));
|
||||||
uint32_t nblocks = 0;
|
|
||||||
double npercentage = 0;
|
double npercentage = 0;
|
||||||
uint32_t nrows = 0;
|
uint32_t nrows = 0;
|
||||||
while (getline(infile, line)) {
|
while (getline(infile, line)) {
|
||||||
@ -314,20 +349,162 @@ TEST_F(BlockCacheTracerTest, BlockCacheAnalyzer) {
|
|||||||
label_read = true;
|
label_read = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (nrows < expected_num_rows_absolute_values) {
|
|
||||||
nblocks += ParseUint32(substr);
|
|
||||||
} else {
|
|
||||||
npercentage += ParseDouble(substr);
|
npercentage += ParseDouble(substr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
ASSERT_EQ(expected_num_rows, nrows);
|
ASSERT_EQ(expected_num_rows, nrows);
|
||||||
ASSERT_EQ(expected_reused_blocks, nblocks);
|
if ("_reuse_lifetime" == test.first ||
|
||||||
|
"_avg_reuse_interval" == test.first ||
|
||||||
|
"_avg_reuse_interval_naccesses" == test.first) {
|
||||||
|
ASSERT_EQ(100, npercentage) << reuse_csv_file;
|
||||||
|
} else {
|
||||||
ASSERT_LT(npercentage, 0);
|
ASSERT_LT(npercentage, 0);
|
||||||
|
}
|
||||||
ASSERT_OK(env_->DeleteFile(reuse_csv_file));
|
ASSERT_OK(env_->DeleteFile(reuse_csv_file));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Validate the percentage of accesses summary.
|
||||||
|
const std::string percent_access_summary_file =
|
||||||
|
test_path_ + "/percentage_of_accesses_summary";
|
||||||
|
std::ifstream infile(percent_access_summary_file);
|
||||||
|
std::string line;
|
||||||
|
ASSERT_TRUE(getline(infile, line));
|
||||||
|
std::set<std::string> callers;
|
||||||
|
std::set<std::string> expected_callers{"Get", "MultiGet", "Iterator",
|
||||||
|
"Prefetch", "Compaction"};
|
||||||
|
while (getline(infile, line)) {
|
||||||
|
std::stringstream caller_percent(line);
|
||||||
|
std::string caller;
|
||||||
|
ASSERT_TRUE(getline(caller_percent, caller, ','));
|
||||||
|
std::string percent;
|
||||||
|
ASSERT_TRUE(getline(caller_percent, percent, ','));
|
||||||
|
ASSERT_FALSE(caller_percent.good());
|
||||||
|
callers.insert(caller);
|
||||||
|
ASSERT_EQ(20, ParseDouble(percent));
|
||||||
|
}
|
||||||
|
ASSERT_EQ(expected_callers.size(), callers.size());
|
||||||
|
for (auto caller : callers) {
|
||||||
|
ASSERT_TRUE(expected_callers.find(caller) != expected_callers.end());
|
||||||
|
}
|
||||||
|
ASSERT_OK(env_->DeleteFile(percent_access_summary_file));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
// Validate the percentage of accesses summary by analyzing callers.
|
||||||
|
std::stringstream analyzing_callers(analyzing_callers_);
|
||||||
|
while (analyzing_callers.good()) {
|
||||||
|
std::string caller;
|
||||||
|
ASSERT_TRUE(getline(analyzing_callers, caller, ','));
|
||||||
|
std::vector<std::string> breakdowns{"level", "bt"};
|
||||||
|
for (auto breakdown : breakdowns) {
|
||||||
|
const std::string file_name = test_path_ + "/" + caller + "_" +
|
||||||
|
breakdown +
|
||||||
|
"_percentage_of_accesses_summary";
|
||||||
|
std::ifstream infile(file_name);
|
||||||
|
std::string line;
|
||||||
|
ASSERT_TRUE(getline(infile, line));
|
||||||
|
double sum = 0;
|
||||||
|
while (getline(infile, line)) {
|
||||||
|
std::stringstream label_percent(line);
|
||||||
|
std::string label;
|
||||||
|
ASSERT_TRUE(getline(label_percent, label, ','));
|
||||||
|
std::string percent;
|
||||||
|
ASSERT_TRUE(getline(label_percent, percent, ','));
|
||||||
|
ASSERT_FALSE(label_percent.good());
|
||||||
|
sum += ParseDouble(percent);
|
||||||
|
}
|
||||||
|
ASSERT_EQ(100, sum);
|
||||||
|
ASSERT_OK(env_->DeleteFile(file_name));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const std::vector<std::string> access_types{"user_access_only", "all_access"};
|
||||||
|
const std::vector<std::string> prefix{"bt", "cf"};
|
||||||
|
for (auto const& pre : prefix) {
|
||||||
|
for (auto const& access_type : access_types) {
|
||||||
|
{
|
||||||
|
// Validate the access count summary.
|
||||||
|
const std::string bt_access_count_summary = test_path_ + "/" + pre +
|
||||||
|
"_" + access_type +
|
||||||
|
"_access_count_summary";
|
||||||
|
std::ifstream infile(bt_access_count_summary);
|
||||||
|
std::string line;
|
||||||
|
ASSERT_TRUE(getline(infile, line));
|
||||||
|
double sum_percent = 0;
|
||||||
|
while (getline(infile, line)) {
|
||||||
|
std::stringstream bt_percent(line);
|
||||||
|
std::string bt;
|
||||||
|
ASSERT_TRUE(getline(bt_percent, bt, ','));
|
||||||
|
std::string percent;
|
||||||
|
ASSERT_TRUE(getline(bt_percent, percent, ','));
|
||||||
|
sum_percent += ParseDouble(percent);
|
||||||
|
}
|
||||||
|
ASSERT_EQ(100.0, sum_percent);
|
||||||
|
ASSERT_OK(env_->DeleteFile(bt_access_count_summary));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto const& access_type : access_types) {
|
||||||
|
std::vector<std::string> block_types{"Index", "Data", "Filter"};
|
||||||
|
for (auto block_type : block_types) {
|
||||||
|
// Validate reuse block timeline.
|
||||||
|
const std::string reuse_blocks_timeline = test_path_ + "/" + block_type +
|
||||||
|
"_" + access_type +
|
||||||
|
"_5_reuse_blocks_timeline";
|
||||||
|
std::ifstream infile(reuse_blocks_timeline);
|
||||||
|
std::string line;
|
||||||
|
ASSERT_TRUE(getline(infile, line)) << reuse_blocks_timeline;
|
||||||
|
uint32_t index = 0;
|
||||||
|
while (getline(infile, line)) {
|
||||||
|
std::stringstream timeline(line);
|
||||||
|
bool start_time = false;
|
||||||
|
double sum = 0;
|
||||||
|
while (timeline.good()) {
|
||||||
|
std::string value;
|
||||||
|
ASSERT_TRUE(getline(timeline, value, ','));
|
||||||
|
if (!start_time) {
|
||||||
|
start_time = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
sum += ParseDouble(value);
|
||||||
|
}
|
||||||
|
index++;
|
||||||
|
ASSERT_LT(sum, 100.0 * index + 1) << reuse_blocks_timeline;
|
||||||
|
}
|
||||||
|
ASSERT_OK(env_->DeleteFile(reuse_blocks_timeline));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::stringstream ss(analyze_get_spatial_locality_labels_);
|
||||||
|
while (ss.good()) {
|
||||||
|
std::string l;
|
||||||
|
ASSERT_TRUE(getline(ss, l, ','));
|
||||||
|
const std::vector<std::string> spatial_locality_files{
|
||||||
|
"_percent_ref_keys", "_percent_accesses_on_ref_keys",
|
||||||
|
"_percent_data_size_on_ref_keys"};
|
||||||
|
for (auto const& spatial_locality_file : spatial_locality_files) {
|
||||||
|
const std::string filename = test_path_ + "/" + l + spatial_locality_file;
|
||||||
|
std::ifstream infile(filename);
|
||||||
|
std::string line;
|
||||||
|
ASSERT_TRUE(getline(infile, line));
|
||||||
|
double sum_percent = 0;
|
||||||
|
uint32_t nrows = 0;
|
||||||
|
while (getline(infile, line)) {
|
||||||
|
std::stringstream bt_percent(line);
|
||||||
|
std::string bt;
|
||||||
|
ASSERT_TRUE(getline(bt_percent, bt, ','));
|
||||||
|
std::string percent;
|
||||||
|
ASSERT_TRUE(getline(bt_percent, percent, ','));
|
||||||
|
sum_percent += ParseDouble(percent);
|
||||||
|
nrows++;
|
||||||
|
}
|
||||||
|
ASSERT_EQ(11, nrows);
|
||||||
|
ASSERT_EQ(100.0, sum_percent);
|
||||||
|
ASSERT_OK(env_->DeleteFile(filename));
|
||||||
|
}
|
||||||
|
}
|
||||||
ASSERT_OK(env_->DeleteFile(block_cache_sim_config_path_));
|
ASSERT_OK(env_->DeleteFile(block_cache_sim_config_path_));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -366,6 +543,7 @@ TEST_F(BlockCacheTracerTest, MixedBlocks) {
|
|||||||
// Read blocks.
|
// Read blocks.
|
||||||
BlockCacheTraceAnalyzer analyzer(trace_file_path_,
|
BlockCacheTraceAnalyzer analyzer(trace_file_path_,
|
||||||
/*output_miss_ratio_curve_path=*/"",
|
/*output_miss_ratio_curve_path=*/"",
|
||||||
|
/*compute_reuse_distance=*/true,
|
||||||
/*simulator=*/nullptr);
|
/*simulator=*/nullptr);
|
||||||
// The analyzer ends when it detects an incomplete access record.
|
// The analyzer ends when it detects an incomplete access record.
|
||||||
ASSERT_EQ(Status::Incomplete(""), analyzer.Analyze());
|
ASSERT_EQ(Status::Incomplete(""), analyzer.Analyze());
|
||||||
|
@ -29,6 +29,8 @@ bool ShouldTrace(const Slice& block_key, const TraceOptions& trace_options) {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
const uint64_t kMicrosInSecond = 1000 * 1000;
|
const uint64_t kMicrosInSecond = 1000 * 1000;
|
||||||
|
const uint64_t kSecondInMinute = 60;
|
||||||
|
const uint64_t kSecondInHour = 3600;
|
||||||
const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName =
|
const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName =
|
||||||
"UnknownColumnFamily";
|
"UnknownColumnFamily";
|
||||||
const uint64_t BlockCacheTraceHelper::kReservedGetId = 0;
|
const uint64_t BlockCacheTraceHelper::kReservedGetId = 0;
|
||||||
|
@ -17,6 +17,9 @@
|
|||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
extern const uint64_t kMicrosInSecond;
|
extern const uint64_t kMicrosInSecond;
|
||||||
|
extern const uint64_t kSecondInMinute;
|
||||||
|
extern const uint64_t kSecondInHour;
|
||||||
|
|
||||||
|
|
||||||
class BlockCacheTraceHelper {
|
class BlockCacheTraceHelper {
|
||||||
public:
|
public:
|
||||||
|
Loading…
Reference in New Issue
Block a user