Add unique key number changing statistics to Trace_analyzer (#4646)
Summary: Changes: 1. in current version, key size distribution is printed out as the result. In this change, the result will be output to a file to make further analyze easier 2. To understand how the unique keys are accessed over time, the total unique key number of each CF of each query type in each second over time is output to a file. In this way, user could know when the unique keys are accessed frequently or accessed rarely. 3. output the total QPS of each CF to a file 4. Add the print result of total queries of each CF of each query type. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4646 Differential Revision: D12968156 Pulled By: zhichao-cao fbshipit-source-id: 6c411c7ec47c7843a70929136efd71a150db0e4c
This commit is contained in:
parent
859dbda6e3
commit
d761857d56
@ -139,7 +139,7 @@ DEFINE_bool(no_key, false,
|
||||
DEFINE_bool(print_overall_stats, true,
|
||||
" Print the stats of the whole trace, "
|
||||
"like total requests, keys, and etc.");
|
||||
DEFINE_bool(print_key_distribution, false, "Print the key size distribution.");
|
||||
DEFINE_bool(output_key_distribution, false, "Print the key size distribution.");
|
||||
DEFINE_bool(
|
||||
output_value_distribution, false,
|
||||
"Out put the value size distribution, only available for Put and Merge.\n"
|
||||
@ -158,6 +158,9 @@ DEFINE_int32(value_interval, 8,
|
||||
"To output the value distribution, we need to set the value "
|
||||
"intervals and make the statistic of the value size distribution "
|
||||
"in different intervals. The default is 8.");
|
||||
DEFINE_double(sample_ratio, 1.0,
|
||||
"If the trace size is extremely huge or user want to sample "
|
||||
"the trace when analyzing, sample ratio can be set (0, 1.0]");
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
@ -280,6 +283,13 @@ TraceAnalyzer::TraceAnalyzer(std::string& trace_path, std::string& output_path,
|
||||
begin_time_ = 0;
|
||||
end_time_ = 0;
|
||||
time_series_start_ = 0;
|
||||
cur_time_sec_ = 0;
|
||||
if (FLAGS_sample_ratio > 1.0 || FLAGS_sample_ratio <= 0) {
|
||||
sample_max_ = 1;
|
||||
} else {
|
||||
sample_max_ = static_cast<uint32_t>(1.0 / FLAGS_sample_ratio);
|
||||
}
|
||||
|
||||
ta_.resize(kTaTypeNum);
|
||||
ta_[0].type_name = "get";
|
||||
if (FLAGS_analyze_get) {
|
||||
@ -329,6 +339,9 @@ TraceAnalyzer::TraceAnalyzer(std::string& trace_path, std::string& output_path,
|
||||
} else {
|
||||
ta_[7].enabled = false;
|
||||
}
|
||||
for (int i = 0; i < kTaTypeNum; i++) {
|
||||
ta_[i].sample_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
TraceAnalyzer::~TraceAnalyzer() {}
|
||||
@ -364,6 +377,13 @@ Status TraceAnalyzer::PrepareProcessing() {
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
qps_stats_name =
|
||||
output_path_ + "/" + FLAGS_output_prefix + "-cf_qps_stats.txt";
|
||||
s = env_->NewWritableFile(qps_stats_name, &cf_qps_f_, env_options_);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -523,7 +543,7 @@ Status TraceAnalyzer::MakeStatistics() {
|
||||
}
|
||||
|
||||
// Generate the key size distribution data
|
||||
if (FLAGS_print_key_distribution) {
|
||||
if (FLAGS_output_key_distribution) {
|
||||
if (stat.second.a_key_size_stats.find(record.first.size()) ==
|
||||
stat.second.a_key_size_stats.end()) {
|
||||
stat.second.a_key_size_stats[record.first.size()] = 1;
|
||||
@ -567,17 +587,31 @@ Status TraceAnalyzer::MakeStatistics() {
|
||||
|
||||
// find the medium of the key size
|
||||
uint64_t k_count = 0;
|
||||
bool get_mid = false;
|
||||
for (auto& record : stat.second.a_key_size_stats) {
|
||||
k_count += record.second;
|
||||
if (k_count >= stat.second.a_key_mid) {
|
||||
if (!get_mid && k_count >= stat.second.a_key_mid) {
|
||||
stat.second.a_key_mid = record.first;
|
||||
break;
|
||||
get_mid = true;
|
||||
}
|
||||
if (FLAGS_output_key_distribution && stat.second.a_key_size_f) {
|
||||
ret = sprintf(buffer_, "%" PRIu64 " %" PRIu64 "\n", record.first,
|
||||
record.second);
|
||||
if (ret < 0) {
|
||||
return Status::IOError("Format output failed");
|
||||
}
|
||||
std::string printout(buffer_);
|
||||
s = stat.second.a_key_size_f->Append(printout);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "Write key size distribution file failed\n");
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// output the value size distribution
|
||||
uint64_t v_begin = 0, v_end = 0, v_count = 0;
|
||||
bool get_mid = false;
|
||||
get_mid = false;
|
||||
for (auto& record : stat.second.a_value_size_stats) {
|
||||
v_begin = v_end;
|
||||
v_end = (record.first + 1) * FLAGS_value_interval;
|
||||
@ -823,6 +857,32 @@ Status TraceAnalyzer::MakeStatisticQPS() {
|
||||
stat.second.a_ave_qps = (static_cast<double>(cf_qps_sum)) / duration;
|
||||
}
|
||||
|
||||
// Output the accessed unique key number change overtime
|
||||
if (stat.second.a_key_num_f) {
|
||||
uint64_t cur_uni_key =
|
||||
static_cast<uint64_t>(stat.second.a_key_stats.size());
|
||||
double cur_ratio = 0.0;
|
||||
uint64_t cur_num = 0;
|
||||
for (uint32_t i = 0; i < duration; i++) {
|
||||
auto find_time = stat.second.uni_key_num.find(i);
|
||||
if (find_time != stat.second.uni_key_num.end()) {
|
||||
cur_ratio = (static_cast<double>(find_time->second)) / cur_uni_key;
|
||||
cur_num = find_time->second;
|
||||
}
|
||||
ret = sprintf(buffer_, "%" PRIu64 " %.12f\n", cur_num, cur_ratio);
|
||||
if (ret < 0) {
|
||||
return Status::IOError("Format the output failed");
|
||||
}
|
||||
std::string printout(buffer_);
|
||||
s = stat.second.a_key_num_f->Append(printout);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr,
|
||||
"Write accessed unique key number change file failed\n");
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// output the prefix of top k access peak
|
||||
if (FLAGS_output_prefix_cut > 0 && stat.second.a_top_qps_prefix_f) {
|
||||
while (!stat.second.top_k_qps_sec.empty()) {
|
||||
@ -887,6 +947,33 @@ Status TraceAnalyzer::MakeStatisticQPS() {
|
||||
}
|
||||
}
|
||||
|
||||
if (cf_qps_f_) {
|
||||
int cfs_size = static_cast<uint32_t>(cfs_.size());
|
||||
uint32_t v;
|
||||
for (uint32_t i = 0; i < duration; i++) {
|
||||
for (int cf = 0; cf < cfs_size; cf++) {
|
||||
if (cfs_[cf].cf_qps.find(i) != cfs_[cf].cf_qps.end()) {
|
||||
v = cfs_[cf].cf_qps[i];
|
||||
} else {
|
||||
v = 0;
|
||||
}
|
||||
if (cf < cfs_size - 1) {
|
||||
ret = sprintf(buffer_, "%u ", v);
|
||||
} else {
|
||||
ret = sprintf(buffer_, "%u\n", v);
|
||||
}
|
||||
if (ret < 0) {
|
||||
return Status::IOError("Format the output failed");
|
||||
}
|
||||
std::string printout(buffer_);
|
||||
s = cf_qps_f_->Append(printout);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
qps_peak_ = qps_peak;
|
||||
for (int type = 0; type <= kTaTypeNum; type++) {
|
||||
if (duration == 0) {
|
||||
@ -1015,7 +1102,7 @@ Status TraceAnalyzer::ReProcessing() {
|
||||
}
|
||||
|
||||
// Make the statistics fo the key size distribution
|
||||
if (FLAGS_print_key_distribution) {
|
||||
if (FLAGS_output_key_distribution) {
|
||||
if (cfs_[cf_id].w_key_size_stats.find(input_key.size()) ==
|
||||
cfs_[cf_id].w_key_size_stats.end()) {
|
||||
cfs_[cf_id].w_key_size_stats[input_key.size()] = 1;
|
||||
@ -1134,6 +1221,11 @@ Status TraceAnalyzer::KeyStatsInsertion(const uint32_t& type,
|
||||
tmp_qps_map[prefix] = 1;
|
||||
ta_[type].stats[cf_id].a_qps_prefix_stats[time_in_sec] = tmp_qps_map;
|
||||
}
|
||||
if (time_in_sec != cur_time_sec_) {
|
||||
ta_[type].stats[cf_id].uni_key_num[cur_time_sec_] =
|
||||
static_cast<uint64_t>(ta_[type].stats[cf_id].a_key_stats.size());
|
||||
cur_time_sec_ = time_in_sec;
|
||||
}
|
||||
} else {
|
||||
found_stats->second.a_count++;
|
||||
found_stats->second.a_key_size_sqsum += MultiplyCheckOverflow(
|
||||
@ -1154,6 +1246,11 @@ Status TraceAnalyzer::KeyStatsInsertion(const uint32_t& type,
|
||||
s = StatsUnitCorrelationUpdate(found_key->second, type, ts, key);
|
||||
}
|
||||
}
|
||||
if (time_in_sec != cur_time_sec_) {
|
||||
found_stats->second.uni_key_num[cur_time_sec_] =
|
||||
static_cast<uint64_t>(found_stats->second.a_key_stats.size());
|
||||
cur_time_sec_ = time_in_sec;
|
||||
}
|
||||
|
||||
auto found_value =
|
||||
found_stats->second.a_value_size_stats.find(dist_value_size);
|
||||
@ -1194,6 +1291,10 @@ Status TraceAnalyzer::KeyStatsInsertion(const uint32_t& type,
|
||||
cfs_[cf_id] = cf_unit;
|
||||
}
|
||||
|
||||
if (FLAGS_output_qps_stats) {
|
||||
cfs_[cf_id].cf_qps[time_in_sec]++;
|
||||
}
|
||||
|
||||
if (FLAGS_output_time_series) {
|
||||
TraceUnit trace_u;
|
||||
trace_u.type = type;
|
||||
@ -1256,6 +1357,9 @@ Status TraceAnalyzer::OpenStatsOutputFiles(const std::string& type,
|
||||
if (FLAGS_output_key_stats) {
|
||||
s = CreateOutputFile(type, new_stats.cf_name, "accessed_key_stats.txt",
|
||||
&new_stats.a_key_f);
|
||||
s = CreateOutputFile(type, new_stats.cf_name,
|
||||
"accessed_unique_key_num_change.txt",
|
||||
&new_stats.a_key_num_f);
|
||||
if (!FLAGS_key_space_dir.empty()) {
|
||||
s = CreateOutputFile(type, new_stats.cf_name, "whole_key_stats.txt",
|
||||
&new_stats.w_key_f);
|
||||
@ -1294,6 +1398,12 @@ Status TraceAnalyzer::OpenStatsOutputFiles(const std::string& type,
|
||||
&new_stats.a_value_size_f);
|
||||
}
|
||||
|
||||
if (FLAGS_output_key_distribution) {
|
||||
s = CreateOutputFile(type, new_stats.cf_name,
|
||||
"accessed_key_size_distribution.txt",
|
||||
&new_stats.a_key_size_f);
|
||||
}
|
||||
|
||||
if (FLAGS_output_qps_stats) {
|
||||
s = CreateOutputFile(type, new_stats.cf_name, "qps_stats.txt",
|
||||
&new_stats.a_qps_f);
|
||||
@ -1333,6 +1443,10 @@ void TraceAnalyzer::CloseOutputFiles() {
|
||||
stat.second.a_key_f->Close();
|
||||
}
|
||||
|
||||
if (stat.second.a_key_num_f) {
|
||||
stat.second.a_key_num_f->Close();
|
||||
}
|
||||
|
||||
if (stat.second.a_count_dist_f) {
|
||||
stat.second.a_count_dist_f->Close();
|
||||
}
|
||||
@ -1345,6 +1459,10 @@ void TraceAnalyzer::CloseOutputFiles() {
|
||||
stat.second.a_value_size_f->Close();
|
||||
}
|
||||
|
||||
if (stat.second.a_key_size_f) {
|
||||
stat.second.a_key_size_f->Close();
|
||||
}
|
||||
|
||||
if (stat.second.a_qps_f) {
|
||||
stat.second.a_qps_f->Close();
|
||||
}
|
||||
@ -1378,6 +1496,15 @@ Status TraceAnalyzer::HandleGet(uint32_t column_family_id,
|
||||
}
|
||||
}
|
||||
|
||||
if (ta_[TraceOperationType::kGet].sample_count >= sample_max_) {
|
||||
ta_[TraceOperationType::kGet].sample_count = 0;
|
||||
}
|
||||
if (ta_[TraceOperationType::kGet].sample_count > 0) {
|
||||
ta_[TraceOperationType::kGet].sample_count++;
|
||||
return Status::OK();
|
||||
}
|
||||
ta_[TraceOperationType::kGet].sample_count++;
|
||||
|
||||
if (!ta_[TraceOperationType::kGet].enabled) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -1405,6 +1532,15 @@ Status TraceAnalyzer::HandlePut(uint32_t column_family_id, const Slice& key,
|
||||
}
|
||||
}
|
||||
|
||||
if (ta_[TraceOperationType::kPut].sample_count >= sample_max_) {
|
||||
ta_[TraceOperationType::kPut].sample_count = 0;
|
||||
}
|
||||
if (ta_[TraceOperationType::kPut].sample_count > 0) {
|
||||
ta_[TraceOperationType::kPut].sample_count++;
|
||||
return Status::OK();
|
||||
}
|
||||
ta_[TraceOperationType::kPut].sample_count++;
|
||||
|
||||
if (!ta_[TraceOperationType::kPut].enabled) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -1429,6 +1565,15 @@ Status TraceAnalyzer::HandleDelete(uint32_t column_family_id,
|
||||
}
|
||||
}
|
||||
|
||||
if (ta_[TraceOperationType::kDelete].sample_count >= sample_max_) {
|
||||
ta_[TraceOperationType::kDelete].sample_count = 0;
|
||||
}
|
||||
if (ta_[TraceOperationType::kDelete].sample_count > 0) {
|
||||
ta_[TraceOperationType::kDelete].sample_count++;
|
||||
return Status::OK();
|
||||
}
|
||||
ta_[TraceOperationType::kDelete].sample_count++;
|
||||
|
||||
if (!ta_[TraceOperationType::kDelete].enabled) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -1453,6 +1598,15 @@ Status TraceAnalyzer::HandleSingleDelete(uint32_t column_family_id,
|
||||
}
|
||||
}
|
||||
|
||||
if (ta_[TraceOperationType::kSingleDelete].sample_count >= sample_max_) {
|
||||
ta_[TraceOperationType::kSingleDelete].sample_count = 0;
|
||||
}
|
||||
if (ta_[TraceOperationType::kSingleDelete].sample_count > 0) {
|
||||
ta_[TraceOperationType::kSingleDelete].sample_count++;
|
||||
return Status::OK();
|
||||
}
|
||||
ta_[TraceOperationType::kSingleDelete].sample_count++;
|
||||
|
||||
if (!ta_[TraceOperationType::kSingleDelete].enabled) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -1478,6 +1632,15 @@ Status TraceAnalyzer::HandleDeleteRange(uint32_t column_family_id,
|
||||
}
|
||||
}
|
||||
|
||||
if (ta_[TraceOperationType::kRangeDelete].sample_count >= sample_max_) {
|
||||
ta_[TraceOperationType::kRangeDelete].sample_count = 0;
|
||||
}
|
||||
if (ta_[TraceOperationType::kRangeDelete].sample_count > 0) {
|
||||
ta_[TraceOperationType::kRangeDelete].sample_count++;
|
||||
return Status::OK();
|
||||
}
|
||||
ta_[TraceOperationType::kRangeDelete].sample_count++;
|
||||
|
||||
if (!ta_[TraceOperationType::kRangeDelete].enabled) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -1504,6 +1667,15 @@ Status TraceAnalyzer::HandleMerge(uint32_t column_family_id, const Slice& key,
|
||||
}
|
||||
}
|
||||
|
||||
if (ta_[TraceOperationType::kMerge].sample_count >= sample_max_) {
|
||||
ta_[TraceOperationType::kMerge].sample_count = 0;
|
||||
}
|
||||
if (ta_[TraceOperationType::kMerge].sample_count > 0) {
|
||||
ta_[TraceOperationType::kMerge].sample_count++;
|
||||
return Status::OK();
|
||||
}
|
||||
ta_[TraceOperationType::kMerge].sample_count++;
|
||||
|
||||
if (!ta_[TraceOperationType::kMerge].enabled) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -1540,6 +1712,15 @@ Status TraceAnalyzer::HandleIter(uint32_t column_family_id,
|
||||
}
|
||||
}
|
||||
|
||||
if (ta_[type].sample_count >= sample_max_) {
|
||||
ta_[type].sample_count = 0;
|
||||
}
|
||||
if (ta_[type].sample_count > 0) {
|
||||
ta_[type].sample_count++;
|
||||
return Status::OK();
|
||||
}
|
||||
ta_[type].sample_count++;
|
||||
|
||||
if (!ta_[type].enabled) {
|
||||
return Status::OK();
|
||||
}
|
||||
@ -1601,6 +1782,8 @@ void TraceAnalyzer::PrintStatistics() {
|
||||
ta_[type].total_succ_access += stat.a_succ_count;
|
||||
printf("*********************************************************\n");
|
||||
printf("colume family id: %u\n", stat.cf_id);
|
||||
printf("Total number of queries to this cf by %s: %" PRIu64 "\n",
|
||||
ta_[type].type_name.c_str(), stat.a_count);
|
||||
printf("Total unique keys in this cf: %" PRIu64 "\n", total_a_keys);
|
||||
printf("Average key size: %f key size medium: %" PRIu64
|
||||
" Key size Variation: %f\n",
|
||||
@ -1647,15 +1830,6 @@ void TraceAnalyzer::PrintStatistics() {
|
||||
}
|
||||
}
|
||||
|
||||
// print the key size distribution
|
||||
if (FLAGS_print_key_distribution) {
|
||||
printf("The key size distribution\n");
|
||||
for (auto& record : stat.a_key_size_stats) {
|
||||
printf("key_size %" PRIu64 " nums: %" PRIu64 "\n", record.first,
|
||||
record.second);
|
||||
}
|
||||
}
|
||||
|
||||
// print the operation correlations
|
||||
if (!FLAGS_print_correlation.empty()) {
|
||||
for (int correlation = 0;
|
||||
@ -1705,6 +1879,8 @@ void TraceAnalyzer::PrintStatistics() {
|
||||
printf("Average QPS per second: %f Peak QPS: %u\n", qps_ave_[kTaTypeNum],
|
||||
qps_peak_[kTaTypeNum]);
|
||||
}
|
||||
printf("The statistics related to query number need to times: %u\n",
|
||||
sample_max_);
|
||||
printf("Total_requests: %" PRIu64 " Total_accessed_keys: %" PRIu64
|
||||
" Total_gets: %" PRIu64 " Total_write_batch: %" PRIu64 "\n",
|
||||
total_requests_, total_access_keys_, total_gets_, total_writes_);
|
||||
|
@ -115,12 +115,15 @@ struct TraceStats {
|
||||
top_k_qps_sec;
|
||||
std::list<TraceUnit> time_series;
|
||||
std::vector<std::pair<uint64_t, uint64_t>> correlation_output;
|
||||
std::map<uint32_t, uint64_t> uni_key_num;
|
||||
|
||||
std::unique_ptr<rocksdb::WritableFile> time_series_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> a_key_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> a_count_dist_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> a_prefix_cut_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> a_value_size_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> a_key_size_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> a_key_num_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> a_qps_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> a_top_qps_prefix_f;
|
||||
std::unique_ptr<rocksdb::WritableFile> w_key_f;
|
||||
@ -140,6 +143,7 @@ struct TypeUnit {
|
||||
uint64_t total_keys;
|
||||
uint64_t total_access;
|
||||
uint64_t total_succ_access;
|
||||
uint32_t sample_count;
|
||||
std::map<uint32_t, TraceStats> stats;
|
||||
TypeUnit() = default;
|
||||
~TypeUnit() = default;
|
||||
@ -155,6 +159,7 @@ struct CfUnit {
|
||||
uint64_t a_count; // the total keys in this cf that are accessed
|
||||
std::map<uint64_t, uint64_t> w_key_size_stats; // whole key space key size
|
||||
// statistic this cf
|
||||
std::map<uint32_t, uint32_t> cf_qps;
|
||||
};
|
||||
|
||||
class TraceAnalyzer {
|
||||
@ -208,8 +213,11 @@ class TraceAnalyzer {
|
||||
uint64_t begin_time_;
|
||||
uint64_t end_time_;
|
||||
uint64_t time_series_start_;
|
||||
uint32_t sample_max_;
|
||||
uint32_t cur_time_sec_;
|
||||
std::unique_ptr<rocksdb::WritableFile> trace_sequence_f_; // readable trace
|
||||
std::unique_ptr<rocksdb::WritableFile> qps_f_; // overall qps
|
||||
std::unique_ptr<rocksdb::WritableFile> cf_qps_f_; // The qps of each CF>
|
||||
std::unique_ptr<rocksdb::SequentialFile> wkey_input_f_;
|
||||
std::vector<TypeUnit> ta_; // The main statistic collecting data structure
|
||||
std::map<uint32_t, CfUnit> cfs_; // All the cf_id appears in this trace;
|
||||
|
Loading…
Reference in New Issue
Block a user