Add 95% confidence intervals to db_bench output (#9882)
Summary: Enhancing `db_bench` output with 95% statistical confidence intervals for better performance evaluation. The goal is to unambiguously separate random variance when running benchmark over multiple iterations. Output enhanced with confidence intervals exposed in brackets: ``` $ ./db_bench --benchmarks=fillseq[-X10] Running benchmark for 10 times fillseq : 4.961 micros/op 201578 ops/sec; 22.3 MB/s fillseq : 5.030 micros/op 198824 ops/sec; 22.0 MB/s fillseq [AVG 2 runs] : 200201 (± 2698) ops/sec; 22.1 (± 0.3) MB/sec fillseq : 4.963 micros/op 201471 ops/sec; 22.3 MB/s fillseq [AVG 3 runs] : 200624 (± 1765) ops/sec; 22.2 (± 0.2) MB/sec fillseq : 5.035 micros/op 198625 ops/sec; 22.0 MB/s fillseq [AVG 4 runs] : 200124 (± 1586) ops/sec; 22.1 (± 0.2) MB/sec fillseq : 4.979 micros/op 200861 ops/sec; 22.2 MB/s fillseq [AVG 5 runs] : 200272 (± 1262) ops/sec; 22.2 (± 0.1) MB/sec fillseq : 4.893 micros/op 204367 ops/sec; 22.6 MB/s fillseq [AVG 6 runs] : 200954 (± 1688) ops/sec; 22.2 (± 0.2) MB/sec fillseq : 4.914 micros/op 203502 ops/sec; 22.5 MB/s fillseq [AVG 7 runs] : 201318 (± 1595) ops/sec; 22.3 (± 0.2) MB/sec fillseq : 4.998 micros/op 200074 ops/sec; 22.1 MB/s fillseq [AVG 8 runs] : 201163 (± 1415) ops/sec; 22.3 (± 0.2) MB/sec fillseq : 4.946 micros/op 202188 ops/sec; 22.4 MB/s fillseq [AVG 9 runs] : 201277 (± 1267) ops/sec; 22.3 (± 0.1) MB/sec fillseq : 5.093 micros/op 196331 ops/sec; 21.7 MB/s fillseq [AVG 10 runs] : 200782 (± 1491) ops/sec; 22.2 (± 0.2) MB/sec fillseq [AVG 10 runs] : 200782 (± 1491) ops/sec; 22.2 (± 0.2) MB/sec fillseq [MEDIAN 10 runs] : 201166 ops/sec; 22.3 MB/s ``` For more explicit interval representation, use `--confidence_interval_only` flag: ``` $ ./db_bench --benchmarks=fillseq[-X10] --confidence_interval_only Running benchmark for 10 times fillseq : 4.935 micros/op 202648 ops/sec; 22.4 MB/s fillseq : 5.078 micros/op 196943 ops/sec; 21.8 MB/s fillseq [CI95 2 runs] : (194205, 205385) ops/sec; (21.5, 22.7) MB/sec fillseq : 5.159 micros/op 193816 ops/sec; 21.4 MB/s fillseq [CI95 3 runs] : (192735, 202869) ops/sec; (21.3, 22.4) MB/sec fillseq : 4.947 micros/op 202158 ops/sec; 22.4 MB/s fillseq [CI95 4 runs] : (194721, 203061) ops/sec; (21.5, 22.5) MB/sec fillseq : 4.908 micros/op 203756 ops/sec; 22.5 MB/s fillseq [CI95 5 runs] : (196113, 203615) ops/sec; (21.7, 22.5) MB/sec fillseq : 5.063 micros/op 197528 ops/sec; 21.9 MB/s fillseq [CI95 6 runs] : (196319, 202631) ops/sec; (21.7, 22.4) MB/sec fillseq : 5.214 micros/op 191799 ops/sec; 21.2 MB/s fillseq [CI95 7 runs] : (194953, 201803) ops/sec; (21.6, 22.3) MB/sec fillseq : 5.260 micros/op 190095 ops/sec; 21.0 MB/s fillseq [CI95 8 runs] : (193749, 200937) ops/sec; (21.4, 22.2) MB/sec fillseq : 5.076 micros/op 196992 ops/sec; 21.8 MB/s fillseq [CI95 9 runs] : (194134, 200474) ops/sec; (21.5, 22.2) MB/sec fillseq : 5.388 micros/op 185603 ops/sec; 20.5 MB/s fillseq [CI95 10 runs] : (192487, 199781) ops/sec; (21.3, 22.1) MB/sec fillseq [AVG 10 runs] : 196134 (± 3647) ops/sec; 21.7 (± 0.4) MB/sec fillseq [MEDIAN 10 runs] : 196968 ops/sec; 21.8 MB/sec ``` Pull Request resolved: https://github.com/facebook/rocksdb/pull/9882 Reviewed By: pdillinger Differential Revision: D35796148 Pulled By: vanekjar fbshipit-source-id: 8313712d16728ff982b8aff28195ee56622385b8
This commit is contained in:
parent
5bd374b392
commit
fb9a167a55
@ -410,6 +410,10 @@ DEFINE_double(read_random_exp_range, 0.0,
|
||||
|
||||
DEFINE_bool(histogram, false, "Print histogram of operation timings");
|
||||
|
||||
DEFINE_bool(confidence_interval_only, false,
|
||||
"Print 95% confidence interval upper and lower bounds only for "
|
||||
"aggregate stats.");
|
||||
|
||||
DEFINE_bool(enable_numa, false,
|
||||
"Make operations aware of NUMA architecture and bind memory "
|
||||
"and cpus corresponding to nodes together. In NUMA, memory "
|
||||
@ -2319,28 +2323,83 @@ class CombinedStats {
|
||||
}
|
||||
|
||||
void Report(const std::string& bench_name) {
|
||||
if (throughput_ops_.size() < 2) {
|
||||
// skip if there are not enough samples
|
||||
return;
|
||||
}
|
||||
|
||||
const char* name = bench_name.c_str();
|
||||
int num_runs = static_cast<int>(throughput_ops_.size());
|
||||
|
||||
if (throughput_mbs_.size() == throughput_ops_.size()) {
|
||||
fprintf(stdout,
|
||||
"%s [AVG %d runs] : %d ops/sec; %6.1f MB/sec\n"
|
||||
"%s [AVG %d runs] : %d (± %d) ops/sec; %6.1f (± %.1f) MB/sec\n",
|
||||
name, num_runs, static_cast<int>(CalcAvg(throughput_ops_)),
|
||||
static_cast<int>(CalcConfidence95(throughput_ops_)),
|
||||
CalcAvg(throughput_mbs_), CalcConfidence95(throughput_mbs_));
|
||||
} else {
|
||||
fprintf(stdout, "%s [AVG %d runs] : %d (± %d) ops/sec\n", name, num_runs,
|
||||
static_cast<int>(CalcAvg(throughput_ops_)),
|
||||
static_cast<int>(CalcConfidence95(throughput_ops_)));
|
||||
}
|
||||
}
|
||||
|
||||
void ReportWithConfidenceIntervals(const std::string& bench_name) {
|
||||
if (throughput_ops_.size() < 2) {
|
||||
// skip if there are not enough samples
|
||||
return;
|
||||
}
|
||||
|
||||
const char* name = bench_name.c_str();
|
||||
int num_runs = static_cast<int>(throughput_ops_.size());
|
||||
|
||||
int ops_avg = static_cast<int>(CalcAvg(throughput_ops_));
|
||||
int ops_confidence_95 = static_cast<int>(CalcConfidence95(throughput_ops_));
|
||||
|
||||
if (throughput_mbs_.size() == throughput_ops_.size()) {
|
||||
double mbs_avg = CalcAvg(throughput_mbs_);
|
||||
double mbs_confidence_95 = CalcConfidence95(throughput_mbs_);
|
||||
fprintf(stdout,
|
||||
"%s [CI95 %d runs] : (%d, %d) ops/sec; (%.1f, %.1f) MB/sec\n",
|
||||
name, num_runs, ops_avg - ops_confidence_95,
|
||||
ops_avg + ops_confidence_95, mbs_avg - mbs_confidence_95,
|
||||
mbs_avg + mbs_confidence_95);
|
||||
} else {
|
||||
fprintf(stdout, "%s [CI95 %d runs] : (%d, %d) ops/sec\n", name, num_runs,
|
||||
ops_avg - ops_confidence_95, ops_avg + ops_confidence_95);
|
||||
}
|
||||
}
|
||||
|
||||
void ReportFinal(const std::string& bench_name) {
|
||||
if (throughput_ops_.size() < 2) {
|
||||
// skip if there are not enough samples
|
||||
return;
|
||||
}
|
||||
|
||||
const char* name = bench_name.c_str();
|
||||
int num_runs = static_cast<int>(throughput_ops_.size());
|
||||
|
||||
if (throughput_mbs_.size() == throughput_ops_.size()) {
|
||||
fprintf(stdout,
|
||||
"%s [AVG %d runs] : %d (± %d) ops/sec; %6.1f (± %.1f) MB/sec\n"
|
||||
"%s [MEDIAN %d runs] : %d ops/sec; %6.1f MB/sec\n",
|
||||
name, num_runs, static_cast<int>(CalcAvg(throughput_ops_)),
|
||||
CalcAvg(throughput_mbs_), name, num_runs,
|
||||
static_cast<int>(CalcMedian(throughput_ops_)),
|
||||
static_cast<int>(CalcConfidence95(throughput_ops_)),
|
||||
CalcAvg(throughput_mbs_), CalcConfidence95(throughput_mbs_), name,
|
||||
num_runs, static_cast<int>(CalcMedian(throughput_ops_)),
|
||||
CalcMedian(throughput_mbs_));
|
||||
} else {
|
||||
fprintf(stdout,
|
||||
"%s [AVG %d runs] : %d ops/sec\n"
|
||||
"%s [AVG %d runs] : %d (± %d) ops/sec\n"
|
||||
"%s [MEDIAN %d runs] : %d ops/sec\n",
|
||||
name, num_runs, static_cast<int>(CalcAvg(throughput_ops_)), name,
|
||||
name, num_runs, static_cast<int>(CalcAvg(throughput_ops_)),
|
||||
static_cast<int>(CalcConfidence95(throughput_ops_)), name,
|
||||
num_runs, static_cast<int>(CalcMedian(throughput_ops_)));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
double CalcAvg(std::vector<double> data) {
|
||||
double CalcAvg(std::vector<double>& data) {
|
||||
double avg = 0;
|
||||
for (double x : data) {
|
||||
avg += x;
|
||||
@ -2349,7 +2408,20 @@ class CombinedStats {
|
||||
return avg;
|
||||
}
|
||||
|
||||
double CalcMedian(std::vector<double> data) {
|
||||
// Calculates 95% CI assuming a normal distribution of samples.
|
||||
// Samples are not from a normal distribution, but it still
|
||||
// provides useful approximation.
|
||||
double CalcConfidence95(std::vector<double>& data) {
|
||||
assert(data.size() > 1);
|
||||
double avg = CalcAvg(data);
|
||||
double std_error = CalcStdDev(data, avg) / std::sqrt(data.size());
|
||||
|
||||
// Z score for the 97.5 percentile
|
||||
// see https://en.wikipedia.org/wiki/1.96
|
||||
return 1.959964 * std_error;
|
||||
}
|
||||
|
||||
double CalcMedian(std::vector<double>& data) {
|
||||
assert(data.size() > 0);
|
||||
std::sort(data.begin(), data.end());
|
||||
|
||||
@ -2363,6 +2435,18 @@ class CombinedStats {
|
||||
}
|
||||
}
|
||||
|
||||
double CalcStdDev(std::vector<double>& data, double average) {
|
||||
assert(data.size() > 1);
|
||||
double squared_sum = 0.0;
|
||||
for (double x : data) {
|
||||
squared_sum += std::pow(x - average, 2);
|
||||
}
|
||||
|
||||
// using samples count - 1 following Bessel's correction
|
||||
// see https://en.wikipedia.org/wiki/Bessel%27s_correction
|
||||
return std::sqrt(squared_sum / (data.size() - 1));
|
||||
}
|
||||
|
||||
std::vector<double> throughput_ops_;
|
||||
std::vector<double> throughput_mbs_;
|
||||
};
|
||||
@ -3525,9 +3609,14 @@ class Benchmark {
|
||||
for (int i = 0; i < num_repeat; i++) {
|
||||
Stats stats = RunBenchmark(num_threads, name, method);
|
||||
combined_stats.AddStats(stats);
|
||||
if (FLAGS_confidence_interval_only) {
|
||||
combined_stats.ReportWithConfidenceIntervals(name);
|
||||
} else {
|
||||
combined_stats.Report(name);
|
||||
}
|
||||
}
|
||||
if (num_repeat > 1) {
|
||||
combined_stats.Report(name);
|
||||
combined_stats.ReportFinal(name);
|
||||
}
|
||||
}
|
||||
if (post_process_method != nullptr) {
|
||||
|
Loading…
Reference in New Issue
Block a user