Analyze MultiGet in trace_analyzer (#8575)

Summary:
Now we can analyze the MultiGet queries in the trace file and generate a set of the statistic and analysis files. Note that, when one MultiGet access N keys, we count each sub-get-query individually. But the over all query number is still the MultiGet not the sub-get-query.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8575

Test Plan: added new unit test and make check

Reviewed By: anand1976

Differential Revision: D29860633

Pulled By: zhichao-cao

fbshipit-source-id: a132128527f36828d266df8e36e3ec626c2170be
This commit is contained in:
Zhichao Cao 2021-07-22 16:51:19 -07:00 committed by Facebook GitHub Bot
parent 2e5388178f
commit 61c9bd49c1
3 changed files with 194 additions and 32 deletions

View File

@ -212,7 +212,8 @@ TEST_F(TraceAnalyzerTest, Get) {
std::vector<std::string> paras = {
"-analyze_get=true", "-analyze_put=false",
"-analyze_delete=false", "-analyze_single_delete=false",
"-analyze_range_delete=false", "-analyze_iterator=false"};
"-analyze_range_delete=false", "-analyze_iterator=false",
"-analyze_multiget=false"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
@ -229,8 +230,9 @@ TEST_F(TraceAnalyzerTest, Get) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "8",
"8", "8", "8", "8", "8", "8",
"8", "8", "0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -257,7 +259,7 @@ TEST_F(TraceAnalyzerTest, Get) {
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps
std::vector<std::string> all_qps = {"1 0 0 0 0 0 0 0 1"};
std::vector<std::string> all_qps = {"1 0 0 0 0 0 0 0 0 1"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
@ -281,7 +283,8 @@ TEST_F(TraceAnalyzerTest, Put) {
std::vector<std::string> paras = {
"-analyze_get=false", "-analyze_put=true",
"-analyze_delete=false", "-analyze_single_delete=false",
"-analyze_range_delete=false", "-analyze_iterator=false"};
"-analyze_range_delete=false", "-analyze_iterator=false",
"-analyze_multiget=false"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
@ -298,8 +301,9 @@ TEST_F(TraceAnalyzerTest, Put) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "8",
"8", "8", "8", "8", "8", "8",
"8", "8", "0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -325,7 +329,7 @@ TEST_F(TraceAnalyzerTest, Put) {
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps
std::vector<std::string> all_qps = {"0 1 0 0 0 0 0 0 1"};
std::vector<std::string> all_qps = {"0 1 0 0 0 0 0 0 0 1"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
@ -355,7 +359,8 @@ TEST_F(TraceAnalyzerTest, Delete) {
std::vector<std::string> paras = {
"-analyze_get=false", "-analyze_put=false",
"-analyze_delete=true", "-analyze_single_delete=false",
"-analyze_range_delete=false", "-analyze_iterator=false"};
"-analyze_range_delete=false", "-analyze_iterator=false",
"-analyze_multiget=false"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
@ -373,8 +378,9 @@ TEST_F(TraceAnalyzerTest, Delete) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "8",
"8", "8", "8", "8", "8", "8",
"8", "8", "0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -400,7 +406,7 @@ TEST_F(TraceAnalyzerTest, Delete) {
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps
std::vector<std::string> all_qps = {"0 0 1 0 0 0 0 0 1"};
std::vector<std::string> all_qps = {"0 0 1 0 0 0 0 0 0 1"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
@ -425,7 +431,7 @@ TEST_F(TraceAnalyzerTest, Merge) {
"-analyze_get=false", "-analyze_put=false",
"-analyze_delete=false", "-analyze_merge=true",
"-analyze_single_delete=false", "-analyze_range_delete=false",
"-analyze_iterator=false"};
"-analyze_iterator=false", "-analyze_multiget=false"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
@ -442,8 +448,9 @@ TEST_F(TraceAnalyzerTest, Merge) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "8",
"8", "8", "8", "8", "8", "8",
"8", "8", "0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -469,7 +476,7 @@ TEST_F(TraceAnalyzerTest, Merge) {
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps
std::vector<std::string> all_qps = {"0 0 0 0 0 1 0 0 1"};
std::vector<std::string> all_qps = {"0 0 0 0 0 1 0 0 0 1"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
@ -501,7 +508,7 @@ TEST_F(TraceAnalyzerTest, SingleDelete) {
"-analyze_get=false", "-analyze_put=false",
"-analyze_delete=false", "-analyze_merge=false",
"-analyze_single_delete=true", "-analyze_range_delete=false",
"-analyze_iterator=false"};
"-analyze_iterator=false", "-analyze_multiget=false"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
@ -519,8 +526,9 @@ TEST_F(TraceAnalyzerTest, SingleDelete) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "8",
"8", "8", "8", "8", "8", "8",
"8", "8", "0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -546,7 +554,7 @@ TEST_F(TraceAnalyzerTest, SingleDelete) {
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps
std::vector<std::string> all_qps = {"0 0 0 1 0 0 0 0 1"};
std::vector<std::string> all_qps = {"0 0 0 1 0 0 0 0 0 1"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
@ -572,7 +580,7 @@ TEST_F(TraceAnalyzerTest, DeleteRange) {
"-analyze_get=false", "-analyze_put=false",
"-analyze_delete=false", "-analyze_merge=false",
"-analyze_single_delete=false", "-analyze_range_delete=true",
"-analyze_iterator=false"};
"-analyze_iterator=false", "-analyze_multiget=false"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
@ -590,8 +598,9 @@ TEST_F(TraceAnalyzerTest, DeleteRange) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "8",
"8", "8", "8", "8", "8", "8",
"8", "8", "0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -618,7 +627,7 @@ TEST_F(TraceAnalyzerTest, DeleteRange) {
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps
std::vector<std::string> all_qps = {"0 0 0 0 2 0 0 0 2"};
std::vector<std::string> all_qps = {"0 0 0 0 2 0 0 0 0 2"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
@ -645,7 +654,7 @@ TEST_F(TraceAnalyzerTest, Iterator) {
"-analyze_get=false", "-analyze_put=false",
"-analyze_delete=false", "-analyze_merge=false",
"-analyze_single_delete=false", "-analyze_range_delete=false",
"-analyze_iterator=true"};
"-analyze_iterator=true", "-analyze_multiget=false"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
@ -664,8 +673,9 @@ TEST_F(TraceAnalyzerTest, Iterator) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "8",
"8", "8", "8", "8", "8", "8",
"8", "8", "0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -691,7 +701,7 @@ TEST_F(TraceAnalyzerTest, Iterator) {
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps
std::vector<std::string> all_qps = {"0 0 0 0 0 0 1 1 2"};
std::vector<std::string> all_qps = {"0 0 0 0 0 0 1 1 0 2"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
@ -755,6 +765,89 @@ TEST_F(TraceAnalyzerTest, Iterator) {
CheckFileContent(top_qps, file_path, true);
}
// Test analyzing of multiget
TEST_F(TraceAnalyzerTest, MultiGet) {
std::string trace_path = test_path_ + "/trace";
std::string output_path = test_path_ + "/multiget";
std::string file_path;
std::vector<std::string> paras = {
"-analyze_get=false", "-analyze_put=false",
"-analyze_delete=false", "-analyze_merge=false",
"-analyze_single_delete=false", "-analyze_range_delete=true",
"-analyze_iterator=false", "-analyze_multiget=true"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
AnalyzeTrace(paras, output_path, trace_path);
// check the key_stats file
std::vector<std::string> k_stats = {"0 0 0 2 0.500000", "0 0 1 2 0.500000",
"0 0 2 1 0.000000", "0 0 3 2 0.500000",
"0 0 4 2 0.500000"};
file_path = output_path + "/test-multiget-0-accessed_key_stats.txt";
CheckFileContent(k_stats, file_path, true);
// Check the access count distribution
std::vector<std::string> k_dist = {"access_count: 1 num: 1",
"access_count: 2 num: 4"};
file_path =
output_path + "/test-multiget-0-accessed_key_count_distribution.txt";
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "8",
"8", "8", "8", "8", "8", "8",
"8", "8", "0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
// Check the prefix
std::vector<std::string> k_prefix = {
"0 0 0 0.000000 0.000000 0x30", "1 2 1 2.000000 0.500000 0x61",
"2 2 1 2.000000 0.500000 0x62", "3 1 1 1.000000 0.000000 0x64",
"4 2 1 2.000000 0.500000 0x67"};
file_path = output_path + "/test-multiget-0-accessed_key_prefix_cut.txt";
CheckFileContent(k_prefix, file_path, true);
// Check the time series
std::vector<std::string> k_series = {"8 0 0", "8 0 1", "8 0 2",
"8 0 3", "8 0 4", "8 0 0",
"8 0 1", "8 0 3", "8 0 4"};
file_path = output_path + "/test-multiget-0-time_series.txt";
CheckFileContent(k_series, file_path, false);
// Check the accessed key in whole key space
std::vector<std::string> k_whole_access = {"0 2", "1 2"};
file_path = output_path + "/test-multiget-0-whole_key_stats.txt";
CheckFileContent(k_whole_access, file_path, true);
// Check the whole key prefix cut
std::vector<std::string> k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63",
"3 0x64", "4 0x65", "5 0x66"};
file_path = output_path + "/test-multiget-0-whole_key_prefix_cut.txt";
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps. We have 3 MultiGet queries and it requested 9 keys
// in total
std::vector<std::string> all_qps = {"0 0 0 0 2 0 0 0 9 11"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
// Check the qps of DeleteRange
std::vector<std::string> get_qps = {"9"};
file_path = output_path + "/test-multiget-0-qps_stats.txt";
CheckFileContent(get_qps, file_path, true);
// Check the top k qps prefix cut
std::vector<std::string> top_qps = {
"At time: 0 with QPS: 9", "The prefix: 0x61 Access count: 2",
"The prefix: 0x62 Access count: 2", "The prefix: 0x64 Access count: 1",
"The prefix: 0x67 Access count: 2", "The prefix: 0x68 Access count: 2"};
file_path =
output_path + "/test-multiget-0-accessed_top_k_qps_prefix_cut.txt";
CheckFileContent(top_qps, file_path, true);
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {

View File

@ -133,6 +133,12 @@ DEFINE_bool(analyze_range_delete, false, "Analyze the DeleteRange query.");
DEFINE_bool(analyze_merge, false, "Analyze the Merge query.");
DEFINE_bool(analyze_iterator, false,
" Analyze the iterate query like seek() and seekForPrev().");
DEFINE_bool(analyze_multiget, false,
" Analyze the MultiGet query. NOTE: for"
" MultiGet, we analyze each KV-pair read in one MultiGet query. "
"Therefore, the total queries and QPS are calculated based on "
"the number of KV-pairs being accessed not the number of MultiGet."
"It can be improved in the future if needed");
DEFINE_bool(no_key, false,
" Does not output the key to the result files to make smaller.");
DEFINE_bool(print_overall_stats, true,
@ -167,13 +173,15 @@ std::map<std::string, int> taOptToIndex = {
{"get", 0}, {"put", 1},
{"delete", 2}, {"single_delete", 3},
{"range_delete", 4}, {"merge", 5},
{"iterator_Seek", 6}, {"iterator_SeekForPrev", 7}};
{"iterator_Seek", 6}, {"iterator_SeekForPrev", 7},
{"multiget", 8}};
std::map<int, std::string> taIndexToOpt = {
{0, "get"}, {1, "put"},
{2, "delete"}, {3, "single_delete"},
{4, "range_delete"}, {5, "merge"},
{6, "iterator_Seek"}, {7, "iterator_SeekForPrev"}};
{6, "iterator_Seek"}, {7, "iterator_SeekForPrev"},
{8, "multiget"}};
namespace {
@ -340,6 +348,12 @@ TraceAnalyzer::TraceAnalyzer(std::string& trace_path, std::string& output_path,
} else {
ta_[7].enabled = false;
}
ta_[8].type_name = "multiget";
if (FLAGS_analyze_multiget) {
ta_[8].enabled = true;
} else {
ta_[8].enabled = false;
}
for (int i = 0; i < kTaTypeNum; i++) {
ta_[i].sample_count = 0;
}
@ -528,6 +542,7 @@ Status TraceAnalyzer::StartProcessing() {
MultiGetPayload multiget_payload;
assert(trace_file_version_ >= 2);
TracerHelper::DecodeMultiGetPayload(&trace, &multiget_payload);
s = HandleMultiGet(multiget_payload, trace.ts);
} else if (trace.type == kTraceEnd) {
break;
}
@ -1213,7 +1228,9 @@ Status TraceAnalyzer::KeyStatsInsertion(const uint32_t& type,
unit.value_size = value_size;
unit.access_count = 1;
unit.latest_ts = ts;
if (type != TraceOperationType::kGet || value_size > 0) {
if ((type != TraceOperationType::kGet &&
type != TraceOperationType::kMultiGet) ||
value_size > 0) {
unit.succ_count = 1;
} else {
unit.succ_count = 0;
@ -1778,6 +1795,56 @@ Status TraceAnalyzer::HandleIter(uint32_t column_family_id,
return s;
}
// Handle MultiGet queries in the trace
Status TraceAnalyzer::HandleMultiGet(MultiGetPayload& multiget_payload,
const uint64_t& ts) {
Status s;
size_t value_size = 0;
if (multiget_payload.cf_ids.size() != multiget_payload.multiget_keys.size()) {
// The size does not match is not the error of tracing and anayzing, we just
// report it to the user. The analyzing continues.
printf("The CF ID vector size does not match the keys vector size!\n");
}
size_t vector_size = std::min(multiget_payload.cf_ids.size(),
multiget_payload.multiget_keys.size());
if (FLAGS_convert_to_human_readable_trace && trace_sequence_f_) {
for (size_t i = 0; i < vector_size; i++) {
assert(i < multiget_payload.cf_ids.size() &&
i < multiget_payload.multiget_keys.size());
s = WriteTraceSequence(TraceOperationType::kMultiGet,
multiget_payload.cf_ids[i],
multiget_payload.multiget_keys[i], value_size, ts);
}
if (!s.ok()) {
return Status::Corruption("Failed to write the trace sequence to file");
}
}
if (ta_[TraceOperationType::kMultiGet].sample_count >= sample_max_) {
ta_[TraceOperationType::kMultiGet].sample_count = 0;
}
if (ta_[TraceOperationType::kMultiGet].sample_count > 0) {
ta_[TraceOperationType::kMultiGet].sample_count++;
return Status::OK();
}
ta_[TraceOperationType::kMultiGet].sample_count++;
if (!ta_[TraceOperationType::kMultiGet].enabled) {
return Status::OK();
}
for (size_t i = 0; i < vector_size; i++) {
assert(i < multiget_payload.cf_ids.size() &&
i < multiget_payload.multiget_keys.size());
s = KeyStatsInsertion(TraceOperationType::kMultiGet,
multiget_payload.cf_ids[i],
multiget_payload.multiget_keys[i], value_size, ts);
}
if (!s.ok()) {
return Status::Corruption("Failed to insert key statistics");
}
return s;
}
// Before the analyzer is closed, the requested general statistic results are
// printed out here. In current stage, these information are not output to
// the files.

View File

@ -32,7 +32,8 @@ enum TraceOperationType : int {
kMerge = 5,
kIteratorSeek = 6,
kIteratorSeekForPrev = 7,
kTaTypeNum = 8
kMultiGet = 8,
kTaTypeNum = 9
};
struct TraceUnit {
@ -193,6 +194,7 @@ class TraceAnalyzer {
const Slice& value);
Status HandleIter(uint32_t column_family_id, const std::string& key,
const uint64_t& ts, TraceType& trace_type);
Status HandleMultiGet(MultiGetPayload& multiget_payload, const uint64_t& ts);
std::vector<TypeUnit>& GetTaVector() { return ta_; }
private: