Add the unit test of Iterator to trace_analyzer_test (#4282)

Summary:
Add the unit test of Iterator (Seek and SeekForPrev) to trace_analyzer_test. The output files after analyzing the trace file are checked to make sure that analyzing results are correct.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4282

Differential Revision: D9436758

Pulled By: zhichao-cao

fbshipit-source-id: 88d471c9a69e07382d9c6a45eba72773b171e7c2
This commit is contained in:
zhichao-cao 2018-08-23 17:26:19 -07:00 committed by Facebook Github Bot
parent ad789e4e0d
commit cf7150ac2e
2 changed files with 190 additions and 156 deletions

View File

@ -62,6 +62,7 @@ class TraceAnalyzerTest : public testing::Test {
DB* db_ = nullptr;
std::string value;
std::unique_ptr<TraceWriter> trace_writer;
Iterator* single_iter = nullptr;
ASSERT_OK(
NewFileTraceWriter(env_, env_options_, trace_path, &trace_writer));
@ -77,7 +78,12 @@ class TraceAnalyzerTest : public testing::Test {
ASSERT_OK(db_->Write(wo, &batch));
ASSERT_OK(db_->Get(ro, "a", &value));
std::this_thread::sleep_for(std::chrono::seconds(1));
single_iter = db_->NewIterator(ro);
single_iter->Seek("a");
single_iter->SeekForPrev("b");
delete single_iter;
std::this_thread::sleep_for (std::chrono::seconds(1));
db_->Get(ro, "g", &value);
ASSERT_OK(db_->EndTrace());
@ -141,6 +147,30 @@ class TraceAnalyzerTest : public testing::Test {
return;
}
void AnalyzeTrace(std::vector<std::string>& paras_diff,
std::string output_path, std::string trace_path) {
std::vector<std::string> paras = {"./trace_analyzer",
"-convert_to_human_readable_trace",
"-output_key_stats",
"-output_access_count_stats",
"-output_prefix=test",
"-output_prefix_cut=1",
"-output_time_series",
"-output_value_distribution",
"-output_qps_stats",
"-no_key",
"-no_print"};
for (auto& para : paras_diff) {
paras.push_back(para);
}
Status s = env_->FileExists(trace_path);
if (!s.ok()) {
GenerateTrace(trace_path);
}
env_->CreateDir(output_path);
RunTraceAnalyzer(paras);
}
rocksdb::Env* env_;
EnvOptions env_options_;
std::string test_path_;
@ -152,28 +182,11 @@ TEST_F(TraceAnalyzerTest, Get) {
std::string trace_path = test_path_ + "/trace";
std::string output_path = test_path_ + "/get";
std::string file_path;
std::vector<std::string> paras = {"./trace_analyzer",
"-analyze_get",
"-convert_to_human_readable_trace",
"-output_key_stats",
"-output_access_count_stats",
"-output_prefix=test",
"-output_prefix_cut=1",
"-output_time_series",
"-output_value_distribution",
"-output_qps_stats",
"-no_key",
"-no_print"};
Status s = env_->FileExists(trace_path);
if (!s.ok()) {
GenerateTrace(trace_path);
}
std::vector<std::string> paras = {"-analyze_get"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
env_->CreateDir(output_path);
RunTraceAnalyzer(paras);
AnalyzeTrace(paras, output_path, trace_path);
// check the key_stats file
std::vector<std::string> k_stats = {"0 10 0 1 1.000000", "0 10 1 1 1.000000"};
@ -186,7 +199,8 @@ TEST_F(TraceAnalyzerTest, Get) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "0", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -234,29 +248,11 @@ TEST_F(TraceAnalyzerTest, Put) {
std::string trace_path = test_path_ + "/trace";
std::string output_path = test_path_ + "/put";
std::string file_path;
std::vector<std::string> paras = {"./trace_analyzer",
"-analyze_get",
"-analyze_put",
"-convert_to_human_readable_trace",
"-output_key_stats",
"-output_access_count_stats",
"-output_prefix=test",
"-output_prefix_cut=1",
"-output_time_series",
"-output_value_distribution",
"-output_qps_stats",
"-no_key",
"-no_print"};
Status s = env_->FileExists(trace_path);
if (!s.ok()) {
GenerateTrace(trace_path);
}
std::vector<std::string> paras = {"-analyze_put"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
env_->CreateDir(output_path);
RunTraceAnalyzer(paras);
AnalyzeTrace(paras, output_path, trace_path);
// check the key_stats file
std::vector<std::string> k_stats = {"0 9 0 1 1.000000"};
@ -269,7 +265,8 @@ TEST_F(TraceAnalyzerTest, Put) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "0", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -299,7 +296,7 @@ TEST_F(TraceAnalyzerTest, Put) {
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
// Check the qps of get
// Check the qps of Put
std::vector<std::string> get_qps = {"1"};
file_path = output_path + "/test-put-0-qps_stats.txt";
CheckFileContent(get_qps, file_path, true);
@ -322,30 +319,11 @@ TEST_F(TraceAnalyzerTest, Delete) {
std::string trace_path = test_path_ + "/trace";
std::string output_path = test_path_ + "/delete";
std::string file_path;
std::vector<std::string> paras = {"./trace_analyzer",
"-analyze_get",
"-analyze_put",
"-analyze_delete",
"-convert_to_human_readable_trace",
"-output_key_stats",
"-output_access_count_stats",
"-output_prefix=test",
"-output_prefix_cut=1",
"-output_time_series",
"-output_value_distribution",
"-output_qps_stats",
"-no_key",
"-no_print"};
Status s = env_->FileExists(trace_path);
if (!s.ok()) {
GenerateTrace(trace_path);
}
std::vector<std::string> paras = {"-analyze_delete"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
env_->CreateDir(output_path);
RunTraceAnalyzer(paras);
AnalyzeTrace(paras, output_path, trace_path);
// check the key_stats file
std::vector<std::string> k_stats = {"0 0 0 1 1.000000"};
@ -359,7 +337,8 @@ TEST_F(TraceAnalyzerTest, Delete) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "0", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -389,7 +368,7 @@ TEST_F(TraceAnalyzerTest, Delete) {
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
// Check the qps of get
// Check the qps of Delete
std::vector<std::string> get_qps = {"1"};
file_path = output_path + "/test-delete-0-qps_stats.txt";
CheckFileContent(get_qps, file_path, true);
@ -406,31 +385,11 @@ TEST_F(TraceAnalyzerTest, Merge) {
std::string trace_path = test_path_ + "/trace";
std::string output_path = test_path_ + "/merge";
std::string file_path;
std::vector<std::string> paras = {"./trace_analyzer",
"-analyze_get",
"-analyze_put",
"-analyze_delete",
"-analyze_merge",
"-convert_to_human_readable_trace",
"-output_key_stats",
"-output_access_count_stats",
"-output_prefix=test",
"-output_prefix_cut=1",
"-output_time_series",
"-output_value_distribution",
"-output_qps_stats",
"-no_key",
"-no_print"};
Status s = env_->FileExists(trace_path);
if (!s.ok()) {
GenerateTrace(trace_path);
}
std::vector<std::string> paras = {"-analyze_merge"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
env_->CreateDir(output_path);
RunTraceAnalyzer(paras);
AnalyzeTrace(paras, output_path, trace_path);
// check the key_stats file
std::vector<std::string> k_stats = {"0 20 0 1 1.000000"};
@ -443,7 +402,8 @@ TEST_F(TraceAnalyzerTest, Merge) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "0", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -473,7 +433,7 @@ TEST_F(TraceAnalyzerTest, Merge) {
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
// Check the qps of get
// Check the qps of Merge
std::vector<std::string> get_qps = {"1"};
file_path = output_path + "/test-merge-0-qps_stats.txt";
CheckFileContent(get_qps, file_path, true);
@ -497,32 +457,11 @@ TEST_F(TraceAnalyzerTest, SingleDelete) {
std::string trace_path = test_path_ + "/trace";
std::string output_path = test_path_ + "/single_delete";
std::string file_path;
std::vector<std::string> paras = {"./trace_analyzer",
"-analyze_get",
"-analyze_put",
"-analyze_delete",
"-analyze_merge",
"-analyze_single_delete",
"-convert_to_human_readable_trace",
"-output_key_stats",
"-output_access_count_stats",
"-output_prefix=test",
"-output_prefix_cut=1",
"-output_time_series",
"-output_value_distribution",
"-output_qps_stats",
"-no_key",
"-no_print"};
Status s = env_->FileExists(trace_path);
if (!s.ok()) {
GenerateTrace(trace_path);
}
std::vector<std::string> paras = {"-analyze_single_delete"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
env_->CreateDir(output_path);
RunTraceAnalyzer(paras);
AnalyzeTrace(paras, output_path, trace_path);
// check the key_stats file
std::vector<std::string> k_stats = {"0 0 0 1 1.000000"};
@ -536,7 +475,8 @@ TEST_F(TraceAnalyzerTest, SingleDelete) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "0", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -566,7 +506,7 @@ TEST_F(TraceAnalyzerTest, SingleDelete) {
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
// Check the qps of get
// Check the qps of SingleDelete
std::vector<std::string> get_qps = {"1"};
file_path = output_path + "/test-single_delete-0-qps_stats.txt";
CheckFileContent(get_qps, file_path, true);
@ -584,33 +524,11 @@ TEST_F(TraceAnalyzerTest, DeleteRange) {
std::string trace_path = test_path_ + "/trace";
std::string output_path = test_path_ + "/range_delete";
std::string file_path;
std::vector<std::string> paras = {"./trace_analyzer",
"-analyze_get",
"-analyze_put",
"-analyze_delete",
"-analyze_merge",
"-analyze_single_delete",
"-analyze_range_delete",
"-convert_to_human_readable_trace",
"-output_key_stats",
"-output_access_count_stats",
"-output_prefix=test",
"-output_prefix_cut=1",
"-output_time_series",
"-output_value_distribution",
"-output_qps_stats",
"-no_key",
"-no_print"};
Status s = env_->FileExists(trace_path);
if (!s.ok()) {
GenerateTrace(trace_path);
}
std::vector<std::string> paras = {"-analyze_range_delete"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
env_->CreateDir(output_path);
RunTraceAnalyzer(paras);
AnalyzeTrace(paras, output_path, trace_path);
// check the key_stats file
std::vector<std::string> k_stats = {"0 0 0 1 1.000000", "0 0 1 1 1.000000"};
@ -624,7 +542,8 @@ TEST_F(TraceAnalyzerTest, DeleteRange) {
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4", "0", "0"};
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
@ -655,7 +574,7 @@ TEST_F(TraceAnalyzerTest, DeleteRange) {
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
// Check the qps of get
// Check the qps of DeleteRange
std::vector<std::string> get_qps = {"2"};
file_path = output_path + "/test-range_delete-0-qps_stats.txt";
CheckFileContent(get_qps, file_path, true);
@ -669,6 +588,121 @@ TEST_F(TraceAnalyzerTest, DeleteRange) {
CheckFileContent(top_qps, file_path, true);
}
// Test analyzing of Iterator
TEST_F(TraceAnalyzerTest, Iterator) {
std::string trace_path = test_path_ + "/trace";
std::string output_path = test_path_ + "/iterator";
std::string file_path;
std::vector<std::string> paras = {"-analyze_iterator"};
paras.push_back("-output_dir=" + output_path);
paras.push_back("-trace_path=" + trace_path);
paras.push_back("-key_space_dir=" + test_path_);
AnalyzeTrace(paras, output_path, trace_path);
// Check the output of Seek
// check the key_stats file
std::vector<std::string> k_stats = {"0 0 0 1 1.000000"};
file_path = output_path + "/test-iterator_Seek-0-accessed_key_stats.txt";
CheckFileContent(k_stats, file_path, true);
// Check the access count distribution
std::vector<std::string> k_dist = {"access_count: 1 num: 1"};
file_path =
output_path + "/test-iterator_Seek-0-accessed_key_count_distribution.txt";
CheckFileContent(k_dist, file_path, true);
// Check the trace sequence
std::vector<std::string> k_sequence = {"1", "5", "2", "3", "4",
"0", "6", "7", "0"};
file_path = output_path + "/test-human_readable_trace.txt";
CheckFileContent(k_sequence, file_path, false);
// Check the prefix
std::vector<std::string> k_prefix = {"0 0 0 0.000000 0.000000 0x30"};
file_path = output_path + "/test-iterator_Seek-0-accessed_key_prefix_cut.txt";
CheckFileContent(k_prefix, file_path, true);
// Check the time series
std::vector<std::string> k_series = {"6 1 0"};
file_path = output_path + "/test-iterator_Seek-0-time_series.txt";
CheckFileContent(k_series, file_path, false);
// Check the accessed key in whole key space
std::vector<std::string> k_whole_access = {"0 1"};
file_path = output_path + "/test-iterator_Seek-0-whole_key_stats.txt";
CheckFileContent(k_whole_access, file_path, true);
// Check the whole key prefix cut
std::vector<std::string> k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63",
"3 0x64", "4 0x65", "5 0x66"};
file_path = output_path + "/test-iterator_Seek-0-whole_key_prefix_cut.txt";
CheckFileContent(k_whole_prefix, file_path, true);
// Check the overall qps
std::vector<std::string> all_qps = {"1 1 1 1 2 1 1 1 9"};
file_path = output_path + "/test-qps_stats.txt";
CheckFileContent(all_qps, file_path, true);
// Check the qps of Iterator_Seek
std::vector<std::string> get_qps = {"1"};
file_path = output_path + "/test-iterator_Seek-0-qps_stats.txt";
CheckFileContent(get_qps, file_path, true);
// Check the top k qps prefix cut
std::vector<std::string> top_qps = {"At time: 0 with QPS: 1",
"The prefix: 0x61 Access count: 1"};
file_path =
output_path + "/test-iterator_Seek-0-accessed_top_k_qps_prefix_cut.txt";
CheckFileContent(top_qps, file_path, true);
// Check the output of SeekForPrev
// check the key_stats file
k_stats = {"0 0 0 1 1.000000"};
file_path =
output_path + "/test-iterator_SeekForPrev-0-accessed_key_stats.txt";
CheckFileContent(k_stats, file_path, true);
// Check the access count distribution
k_dist = {"access_count: 1 num: 1"};
file_path =
output_path +
"/test-iterator_SeekForPrev-0-accessed_key_count_distribution.txt";
CheckFileContent(k_dist, file_path, true);
// Check the prefix
k_prefix = {"0 0 0 0.000000 0.000000 0x30"};
file_path =
output_path + "/test-iterator_SeekForPrev-0-accessed_key_prefix_cut.txt";
CheckFileContent(k_prefix, file_path, true);
// Check the time series
k_series = {"7 0 0"};
file_path = output_path + "/test-iterator_SeekForPrev-0-time_series.txt";
CheckFileContent(k_series, file_path, false);
// Check the accessed key in whole key space
k_whole_access = {"1 1"};
file_path = output_path + "/test-iterator_SeekForPrev-0-whole_key_stats.txt";
CheckFileContent(k_whole_access, file_path, true);
// Check the whole key prefix cut
k_whole_prefix = {"0 0x61", "1 0x62", "2 0x63", "3 0x64", "4 0x65", "5 0x66"};
file_path =
output_path + "/test-iterator_SeekForPrev-0-whole_key_prefix_cut.txt";
CheckFileContent(k_whole_prefix, file_path, true);
// Check the qps of Iterator_SeekForPrev
get_qps = {"1"};
file_path = output_path + "/test-iterator_SeekForPrev-0-qps_stats.txt";
CheckFileContent(get_qps, file_path, true);
// Check the top k qps prefix cut
top_qps = {"At time: 0 with QPS: 1", "The prefix: 0x62 Access count: 1"};
file_path = output_path +
"/test-iterator_SeekForPrev-0-accessed_top_k_qps_prefix_cut.txt";
CheckFileContent(top_qps, file_path, true);
}
} // namespace rocksdb
int main(int argc, char** argv) {

View File

@ -63,36 +63,36 @@ DEFINE_string(output_prefix, "trace",
DEFINE_bool(output_key_stats, false,
"Output the key access count statistics to file\n"
"for accessed keys:\n"
"file name: <prefix>-<query type>-<cf_id>-accessed_key_stats.txt\n"
"file name: <prefix>-<query_type>-<cf_id>-accessed_key_stats.txt\n"
"Format:[cf_id value_size access_keyid access_count]\n"
"for the whole key space keys:\n"
"File name: <prefix>-<query type>-<cf_id>-whole_key_stats.txt\n"
"File name: <prefix>-<query_type>-<cf_id>-whole_key_stats.txt\n"
"Format:[whole_key_space_keyid access_count]");
DEFINE_bool(output_access_count_stats, false,
"Output the access count distribution statistics to file.\n"
"File name: <prefix>-<query type>-<cf_id>-accessed_"
"File name: <prefix>-<query_type>-<cf_id>-accessed_"
"key_count_distribution.txt \n"
"Format:[access_count number_of_access_count]");
DEFINE_bool(output_time_series, false,
"Output the access time in second of each key, "
"such that we can have the time series data of the queries \n"
"File name: <prefix>-<query type>-<cf_id>-time_series.txt\n"
"File name: <prefix>-<query_type>-<cf_id>-time_series.txt\n"
"Format:[type_id time_in_sec access_keyid].");
DEFINE_int32(output_prefix_cut, 0,
"The number of bytes as prefix to cut the keys.\n"
"if it is enabled, it will generate the following:\n"
"for accessed keys:\n"
"File name: <prefix>-<query type>-<cf_id>-"
"If it is enabled, it will generate the following:\n"
"For accessed keys:\n"
"File name: <prefix>-<query_type>-<cf_id>-"
"accessed_key_prefix_cut.txt \n"
"Format:[acessed_keyid access_count_of_prefix "
"number_of_keys_in_prefix average_key_access "
"prefix_succ_ratio prefix]\n"
"for whole key space keys:\n"
"File name: <prefix>-<query type>-<cf_id>"
"For whole key space keys:\n"
"File name: <prefix>-<query_type>-<cf_id>"
"-whole_key_prefix_cut.txt\n"
"Format:[start_keyid_in_whole_keyspace prefix]\n"
"if 'output_qps_stats' and 'top_k' are enabled, it will output:\n"
"File name: <prefix>-<query type>-<cf_id>"
"File name: <prefix>-<query_type>-<cf_id>"
"-accessed_top_k_qps_prefix_cut.txt\n"
"Format:[the_top_ith_qps_time QPS], [prefix qps_of_this_second].");
DEFINE_bool(convert_to_human_readable_trace, false,
@ -101,7 +101,7 @@ DEFINE_bool(convert_to_human_readable_trace, false,
"This file will be extremely large "
"(similar size as the original binary trace file). "
"You can specify 'no_key' to reduce the size, if key is not "
"needed in the next step\n"
"needed in the next step.\n"
"File name: <prefix>_human_readable_trace.txt\n"
"Format:[type_id cf_id value_size time_in_micorsec <key>].");
DEFINE_bool(output_qps_stats, false,
@ -110,8 +110,8 @@ DEFINE_bool(output_qps_stats, false,
"The time is started from the first trace record\n"
"File name: <prefix>_qps_stats.txt\n"
"Format: [qps_type_1 qps_type_2 ...... overall_qps]\n"
"For each cf and query, it will have its own qps output\n"
"File name: <prefix>-<query type>-<cf_id>_qps_stats.txt \n"
"For each cf and query, it will have its own qps output.\n"
"File name: <prefix>-<query_type>-<cf_id>_qps_stats.txt \n"
"Format:[query_count_in_this_second].");
DEFINE_bool(no_print, false, "Do not print out any result");
DEFINE_string(
@ -122,7 +122,7 @@ DEFINE_string(
"get, put, delete, single_delete, rangle_delete, merge. No space "
"between the pairs separated by commar. Example: =[get,get]... "
"It will print out the number of pairs of 'A after B' and "
"the average time interval between the two query");
"the average time interval between the two query.");
DEFINE_string(key_space_dir, "",
"<the directory stores full key space files> \n"
"The key space files should be: <column family id>.txt");
@ -143,7 +143,7 @@ DEFINE_bool(print_key_distribution, false, "Print the key size distribution.");
DEFINE_bool(
output_value_distribution, false,
"Out put the value size distribution, only available for Put and Merge.\n"
"File name: <prefix>-<query type>-<cf_id>"
"File name: <prefix>-<query_type>-<cf_id>"
"-accessed_value_size_distribution.txt\n"
"Format:[Number_of_value_size_between x and "
"x+value_interval is: <the count>]");