Move benchmark timing to Env::NowNanos()
This commit is contained in:
parent
e316af5f16
commit
ba16c1f410
@ -12,7 +12,6 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iostream>
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
@ -192,21 +191,7 @@ static double RunBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
|
|||||||
// They key here is accuracy; too low numbers means the accuracy was
|
// They key here is accuracy; too low numbers means the accuracy was
|
||||||
// coarse. We up the ante until we get to at least minNanoseconds
|
// coarse. We up the ante until we get to at least minNanoseconds
|
||||||
// timings.
|
// timings.
|
||||||
static uint64_t resolutionInNs = 0;
|
static const auto minNanoseconds = FLAGS_bm_min_usec * 1000UL;
|
||||||
if (!resolutionInNs) {
|
|
||||||
timespec ts;
|
|
||||||
ASSERT_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts));
|
|
||||||
ASSERT_EQ(0, ts.tv_sec); // "Clock sucks.";
|
|
||||||
ASSERT_LT(0, ts.tv_nsec); // "Clock too fast for its own good.";
|
|
||||||
ASSERT_EQ(1, ts.tv_nsec); // "Clock too coarse, upgrade your kernel.";
|
|
||||||
resolutionInNs = ts.tv_nsec;
|
|
||||||
}
|
|
||||||
// We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
|
|
||||||
// the clock resolution is worse than that, it will be larger. In
|
|
||||||
// essence we're aiming at making the quantization noise 0.01%.
|
|
||||||
static const auto minNanoseconds =
|
|
||||||
max(FLAGS_bm_min_usec * 1000UL,
|
|
||||||
min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
|
|
||||||
|
|
||||||
// We do measurements in several epochs and take the minimum, to
|
// We do measurements in several epochs and take the minimum, to
|
||||||
// account for jitter.
|
// account for jitter.
|
||||||
@ -214,8 +199,8 @@ static double RunBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
|
|||||||
// We establish a total time budget as we don't want a measurement
|
// We establish a total time budget as we don't want a measurement
|
||||||
// to take too long. This will curtail the number of actual epochs.
|
// to take too long. This will curtail the number of actual epochs.
|
||||||
const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
|
const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
|
||||||
timespec global;
|
auto env = Env::Default();
|
||||||
ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
|
uint64_t global = env->NowNanos();
|
||||||
|
|
||||||
double epochResults[epochs] = { 0 };
|
double epochResults[epochs] = { 0 };
|
||||||
size_t actualEpochs = 0;
|
size_t actualEpochs = 0;
|
||||||
@ -233,9 +218,8 @@ static double RunBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
|
|||||||
// Done with the current epoch, we got a meaningful timing.
|
// Done with the current epoch, we got a meaningful timing.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
timespec now;
|
uint64_t now = env->NowNanos();
|
||||||
ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
|
if ((now - global) >= timeBudgetInNs) {
|
||||||
if (detail::TimespecDiff(now, global) >= timeBudgetInNs) {
|
|
||||||
// No more time budget available.
|
// No more time budget available.
|
||||||
++actualEpochs;
|
++actualEpochs;
|
||||||
break;
|
break;
|
||||||
|
@ -12,11 +12,11 @@
|
|||||||
#include <gflags/gflags.h>
|
#include <gflags/gflags.h>
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <ctime>
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
|
||||||
#include "util/testharness.h"
|
#include "util/testharness.h"
|
||||||
|
#include "rocksdb/env.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
@ -28,13 +28,6 @@ void RunBenchmarks();
|
|||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
|
|
||||||
/**
|
|
||||||
* This is the clock ID used for measuring time. On older kernels, the
|
|
||||||
* resolution of this clock will be very coarse, which will cause the
|
|
||||||
* benchmarks to fail.
|
|
||||||
*/
|
|
||||||
enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a benchmark wrapped in a std::function. Only used
|
* Adds a benchmark wrapped in a std::function. Only used
|
||||||
* internally. Pass by value is intentional.
|
* internally. Pass by value is intentional.
|
||||||
@ -43,45 +36,6 @@ void AddBenchmarkImpl(const char* file,
|
|||||||
const char* name,
|
const char* name,
|
||||||
std::function<uint64_t(unsigned int)>);
|
std::function<uint64_t(unsigned int)>);
|
||||||
|
|
||||||
/**
|
|
||||||
* Takes the difference between two timespec values. end is assumed to
|
|
||||||
* occur after start.
|
|
||||||
*/
|
|
||||||
inline uint64_t TimespecDiff(timespec end, timespec start) {
|
|
||||||
if (end.tv_sec == start.tv_sec) {
|
|
||||||
assert(end.tv_nsec >= start.tv_nsec);
|
|
||||||
return end.tv_nsec - start.tv_nsec;
|
|
||||||
}
|
|
||||||
assert(end.tv_sec > start.tv_sec &&
|
|
||||||
end.tv_sec - start.tv_sec <
|
|
||||||
std::numeric_limits<uint64_t>::max() / 1000000000UL);
|
|
||||||
return (end.tv_sec - start.tv_sec) * 1000000000UL
|
|
||||||
+ end.tv_nsec - start.tv_nsec;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Takes the difference between two sets of timespec values. The first
|
|
||||||
* two come from a high-resolution clock whereas the other two come
|
|
||||||
* from a low-resolution clock. The crux of the matter is that
|
|
||||||
* high-res values may be bogus as documented in
|
|
||||||
* http://linux.die.net/man/3/clock_gettime. The trouble is when the
|
|
||||||
* running process migrates from one CPU to another, which is more
|
|
||||||
* likely for long-running processes. Therefore we watch for high
|
|
||||||
* differences between the two timings.
|
|
||||||
*
|
|
||||||
* This function is subject to further improvements.
|
|
||||||
*/
|
|
||||||
inline uint64_t TimespecDiff(timespec end, timespec start,
|
|
||||||
timespec endCoarse, timespec startCoarse) {
|
|
||||||
auto fine = TimespecDiff(end, start);
|
|
||||||
auto coarse = TimespecDiff(endCoarse, startCoarse);
|
|
||||||
if (coarse - fine >= 1000000) {
|
|
||||||
// The fine time is in all likelihood bogus
|
|
||||||
return coarse;
|
|
||||||
}
|
|
||||||
return fine;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace detail
|
} // namespace detail
|
||||||
|
|
||||||
|
|
||||||
@ -89,42 +43,37 @@ inline uint64_t TimespecDiff(timespec end, timespec start,
|
|||||||
* Supporting type for BENCHMARK_SUSPEND defined below.
|
* Supporting type for BENCHMARK_SUSPEND defined below.
|
||||||
*/
|
*/
|
||||||
struct BenchmarkSuspender {
|
struct BenchmarkSuspender {
|
||||||
BenchmarkSuspender() {
|
BenchmarkSuspender() { start_ = Env::Default()->NowNanos(); }
|
||||||
ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start_));
|
|
||||||
}
|
|
||||||
|
|
||||||
BenchmarkSuspender(const BenchmarkSuspender &) = delete;
|
BenchmarkSuspender(const BenchmarkSuspender&) = delete;
|
||||||
BenchmarkSuspender(BenchmarkSuspender && rhs) {
|
BenchmarkSuspender(BenchmarkSuspender && rhs) {
|
||||||
start_ = rhs.start_;
|
start_ = rhs.start_;
|
||||||
rhs.start_.tv_nsec = rhs.start_.tv_sec = 0;
|
rhs.start_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
|
BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
|
||||||
BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
|
BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
|
||||||
if (start_.tv_nsec > 0 || start_.tv_sec > 0) {
|
if (start_ > 0) {
|
||||||
tally();
|
tally();
|
||||||
}
|
}
|
||||||
start_ = rhs.start_;
|
start_ = rhs.start_;
|
||||||
rhs.start_.tv_nsec = rhs.start_.tv_sec = 0;
|
rhs.start_ = 0;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
~BenchmarkSuspender() {
|
~BenchmarkSuspender() {
|
||||||
if (start_.tv_nsec > 0 || start_.tv_sec > 0) {
|
if (start_ > 0) {
|
||||||
tally();
|
tally();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dismiss() {
|
void Dismiss() {
|
||||||
assert(start_.tv_nsec > 0 || start_.tv_sec > 0);
|
assert(start_ > 0);
|
||||||
tally();
|
tally();
|
||||||
start_.tv_nsec = start_.tv_sec = 0;
|
start_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Rehire() {
|
void Rehire() { start_ = Env::Default()->NowNanos(); }
|
||||||
assert(start_.tv_nsec == 0 || start_.tv_sec == 0);
|
|
||||||
ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start_));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This helps the macro definition. To get around the dangers of
|
* This helps the macro definition. To get around the dangers of
|
||||||
@ -132,9 +81,7 @@ struct BenchmarkSuspender {
|
|||||||
* arithmetic).
|
* arithmetic).
|
||||||
*/
|
*/
|
||||||
/* implicit */
|
/* implicit */
|
||||||
operator int BenchmarkSuspender::*() const {
|
operator int BenchmarkSuspender::*() const { return nullptr; }
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accumulates nanoseconds spent outside benchmark.
|
* Accumulates nanoseconds spent outside benchmark.
|
||||||
@ -144,13 +91,12 @@ struct BenchmarkSuspender {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void tally() {
|
void tally() {
|
||||||
timespec end;
|
uint64_t end = Env::Default()->NowNanos();
|
||||||
ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
|
nsSpent += start_ - end;
|
||||||
nsSpent += detail::TimespecDiff(end, start_);
|
|
||||||
start_ = end;
|
start_ = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
timespec start_;
|
uint64_t start_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -165,18 +111,15 @@ void
|
|||||||
AddBenchmark_n(const char* file, const char* name, Lambda&& lambda) {
|
AddBenchmark_n(const char* file, const char* name, Lambda&& lambda) {
|
||||||
auto execute = [=](unsigned int times) -> uint64_t {
|
auto execute = [=](unsigned int times) -> uint64_t {
|
||||||
BenchmarkSuspender::nsSpent = 0;
|
BenchmarkSuspender::nsSpent = 0;
|
||||||
timespec start, end;
|
uint64_t start, end;
|
||||||
|
auto env = Env::Default();
|
||||||
|
|
||||||
// CORE MEASUREMENT STARTS
|
// CORE MEASUREMENT STARTS
|
||||||
auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start);
|
start = env->NowNanos();
|
||||||
lambda(times);
|
lambda(times);
|
||||||
auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end);
|
end = env->NowNanos();
|
||||||
// CORE MEASUREMENT ENDS
|
// CORE MEASUREMENT ENDS
|
||||||
|
return (end - start) - BenchmarkSuspender::nsSpent;
|
||||||
ASSERT_EQ(0, r1);
|
|
||||||
ASSERT_EQ(0, r2);
|
|
||||||
|
|
||||||
return detail::TimespecDiff(end, start) - BenchmarkSuspender::nsSpent;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
detail::AddBenchmarkImpl(file, name,
|
detail::AddBenchmarkImpl(file, name,
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
namespace benchmark {
|
|
||||||
|
|
||||||
BENCHMARK(insertFrontVector) {
|
BENCHMARK(insertFrontVector) {
|
||||||
std::vector<int> v;
|
std::vector<int> v;
|
||||||
@ -60,7 +59,6 @@ BENCHMARK_RELATIVE_N(insertFrontEndSuspend_n, n) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace benchmark
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
Loading…
Reference in New Issue
Block a user