Move benchmark timing to Env::NowNanos()

This commit is contained in:
Igor Canadi 2014-04-21 17:43:48 -07:00
parent e316af5f16
commit ba16c1f410
3 changed files with 24 additions and 99 deletions

View File

@ -12,7 +12,6 @@
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <iostream>
#include <limits> #include <limits>
#include <string> #include <string>
#include <utility> #include <utility>
@ -192,21 +191,7 @@ static double RunBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
// They key here is accuracy; too low numbers means the accuracy was // They key here is accuracy; too low numbers means the accuracy was
// coarse. We up the ante until we get to at least minNanoseconds // coarse. We up the ante until we get to at least minNanoseconds
// timings. // timings.
static uint64_t resolutionInNs = 0; static const auto minNanoseconds = FLAGS_bm_min_usec * 1000UL;
if (!resolutionInNs) {
timespec ts;
ASSERT_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts));
ASSERT_EQ(0, ts.tv_sec); // "Clock sucks.";
ASSERT_LT(0, ts.tv_nsec); // "Clock too fast for its own good.";
ASSERT_EQ(1, ts.tv_nsec); // "Clock too coarse, upgrade your kernel.";
resolutionInNs = ts.tv_nsec;
}
// We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
// the clock resolution is worse than that, it will be larger. In
// essence we're aiming at making the quantization noise 0.01%.
static const auto minNanoseconds =
max(FLAGS_bm_min_usec * 1000UL,
min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
// We do measurements in several epochs and take the minimum, to // We do measurements in several epochs and take the minimum, to
// account for jitter. // account for jitter.
@ -214,8 +199,8 @@ static double RunBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
// We establish a total time budget as we don't want a measurement // We establish a total time budget as we don't want a measurement
// to take too long. This will curtail the number of actual epochs. // to take too long. This will curtail the number of actual epochs.
const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000; const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
timespec global; auto env = Env::Default();
ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &global)); uint64_t global = env->NowNanos();
double epochResults[epochs] = { 0 }; double epochResults[epochs] = { 0 };
size_t actualEpochs = 0; size_t actualEpochs = 0;
@ -233,9 +218,8 @@ static double RunBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
// Done with the current epoch, we got a meaningful timing. // Done with the current epoch, we got a meaningful timing.
break; break;
} }
timespec now; uint64_t now = env->NowNanos();
ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &now)); if ((now - global) >= timeBudgetInNs) {
if (detail::TimespecDiff(now, global) >= timeBudgetInNs) {
// No more time budget available. // No more time budget available.
++actualEpochs; ++actualEpochs;
break; break;

View File

@ -12,11 +12,11 @@
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <cassert> #include <cassert>
#include <ctime>
#include <functional> #include <functional>
#include <limits> #include <limits>
#include "util/testharness.h" #include "util/testharness.h"
#include "rocksdb/env.h"
namespace rocksdb { namespace rocksdb {
namespace benchmark { namespace benchmark {
@ -28,13 +28,6 @@ void RunBenchmarks();
namespace detail { namespace detail {
/**
* This is the clock ID used for measuring time. On older kernels, the
* resolution of this clock will be very coarse, which will cause the
* benchmarks to fail.
*/
enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
/** /**
* Adds a benchmark wrapped in a std::function. Only used * Adds a benchmark wrapped in a std::function. Only used
* internally. Pass by value is intentional. * internally. Pass by value is intentional.
@ -43,45 +36,6 @@ void AddBenchmarkImpl(const char* file,
const char* name, const char* name,
std::function<uint64_t(unsigned int)>); std::function<uint64_t(unsigned int)>);
/**
* Takes the difference between two timespec values. end is assumed to
* occur after start.
*/
inline uint64_t TimespecDiff(timespec end, timespec start) {
if (end.tv_sec == start.tv_sec) {
assert(end.tv_nsec >= start.tv_nsec);
return end.tv_nsec - start.tv_nsec;
}
assert(end.tv_sec > start.tv_sec &&
end.tv_sec - start.tv_sec <
std::numeric_limits<uint64_t>::max() / 1000000000UL);
return (end.tv_sec - start.tv_sec) * 1000000000UL
+ end.tv_nsec - start.tv_nsec;
}
/**
* Takes the difference between two sets of timespec values. The first
* two come from a high-resolution clock whereas the other two come
* from a low-resolution clock. The crux of the matter is that
* high-res values may be bogus as documented in
* http://linux.die.net/man/3/clock_gettime. The trouble is when the
* running process migrates from one CPU to another, which is more
* likely for long-running processes. Therefore we watch for high
* differences between the two timings.
*
* This function is subject to further improvements.
*/
inline uint64_t TimespecDiff(timespec end, timespec start,
timespec endCoarse, timespec startCoarse) {
auto fine = TimespecDiff(end, start);
auto coarse = TimespecDiff(endCoarse, startCoarse);
if (coarse - fine >= 1000000) {
// The fine time is in all likelihood bogus
return coarse;
}
return fine;
}
} // namespace detail } // namespace detail
@ -89,42 +43,37 @@ inline uint64_t TimespecDiff(timespec end, timespec start,
* Supporting type for BENCHMARK_SUSPEND defined below. * Supporting type for BENCHMARK_SUSPEND defined below.
*/ */
struct BenchmarkSuspender { struct BenchmarkSuspender {
BenchmarkSuspender() { BenchmarkSuspender() { start_ = Env::Default()->NowNanos(); }
ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start_));
}
BenchmarkSuspender(const BenchmarkSuspender &) = delete; BenchmarkSuspender(const BenchmarkSuspender&) = delete;
BenchmarkSuspender(BenchmarkSuspender && rhs) { BenchmarkSuspender(BenchmarkSuspender && rhs) {
start_ = rhs.start_; start_ = rhs.start_;
rhs.start_.tv_nsec = rhs.start_.tv_sec = 0; rhs.start_ = 0;
} }
BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete; BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) { BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
if (start_.tv_nsec > 0 || start_.tv_sec > 0) { if (start_ > 0) {
tally(); tally();
} }
start_ = rhs.start_; start_ = rhs.start_;
rhs.start_.tv_nsec = rhs.start_.tv_sec = 0; rhs.start_ = 0;
return *this; return *this;
} }
~BenchmarkSuspender() { ~BenchmarkSuspender() {
if (start_.tv_nsec > 0 || start_.tv_sec > 0) { if (start_ > 0) {
tally(); tally();
} }
} }
void Dismiss() { void Dismiss() {
assert(start_.tv_nsec > 0 || start_.tv_sec > 0); assert(start_ > 0);
tally(); tally();
start_.tv_nsec = start_.tv_sec = 0; start_ = 0;
} }
void Rehire() { void Rehire() { start_ = Env::Default()->NowNanos(); }
assert(start_.tv_nsec == 0 || start_.tv_sec == 0);
ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start_));
}
/** /**
* This helps the macro definition. To get around the dangers of * This helps the macro definition. To get around the dangers of
@ -132,9 +81,7 @@ struct BenchmarkSuspender {
* arithmetic). * arithmetic).
*/ */
/* implicit */ /* implicit */
operator int BenchmarkSuspender::*() const { operator int BenchmarkSuspender::*() const { return nullptr; }
return nullptr;
}
/** /**
* Accumulates nanoseconds spent outside benchmark. * Accumulates nanoseconds spent outside benchmark.
@ -144,13 +91,12 @@ struct BenchmarkSuspender {
private: private:
void tally() { void tally() {
timespec end; uint64_t end = Env::Default()->NowNanos();
ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end)); nsSpent += start_ - end;
nsSpent += detail::TimespecDiff(end, start_);
start_ = end; start_ = end;
} }
timespec start_; uint64_t start_;
}; };
/** /**
@ -165,18 +111,15 @@ void
AddBenchmark_n(const char* file, const char* name, Lambda&& lambda) { AddBenchmark_n(const char* file, const char* name, Lambda&& lambda) {
auto execute = [=](unsigned int times) -> uint64_t { auto execute = [=](unsigned int times) -> uint64_t {
BenchmarkSuspender::nsSpent = 0; BenchmarkSuspender::nsSpent = 0;
timespec start, end; uint64_t start, end;
auto env = Env::Default();
// CORE MEASUREMENT STARTS // CORE MEASUREMENT STARTS
auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start); start = env->NowNanos();
lambda(times); lambda(times);
auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end); end = env->NowNanos();
// CORE MEASUREMENT ENDS // CORE MEASUREMENT ENDS
return (end - start) - BenchmarkSuspender::nsSpent;
ASSERT_EQ(0, r1);
ASSERT_EQ(0, r2);
return detail::TimespecDiff(end, start) - BenchmarkSuspender::nsSpent;
}; };
detail::AddBenchmarkImpl(file, name, detail::AddBenchmarkImpl(file, name,

View File

@ -8,7 +8,6 @@
#include <vector> #include <vector>
namespace rocksdb { namespace rocksdb {
namespace benchmark {
BENCHMARK(insertFrontVector) { BENCHMARK(insertFrontVector) {
std::vector<int> v; std::vector<int> v;
@ -60,7 +59,6 @@ BENCHMARK_RELATIVE_N(insertFrontEndSuspend_n, n) {
} }
} }
} // namespace benchmark
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {