Added benchmark functionality on the lines of folly/Benchmark.h

Summary: Added benchmark functionality on the lines of folly/Benchmark.h Test Plan: Added unit tests Reviewers: igor, haobo, sdong, ljin, yhchiang, dhruba Reviewed By: igor CC: leveldb Differential Revision: https://reviews.facebook.net/D17973
2014-04-21 12:29:55 -07:00 · 2014-04-21 12:29:55 -07:00 · ff1b5df4c6
commit ff1b5df4c6
parent c7076a7a05
10 changed files with 911 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@ -10,7 +10,7 @@ build_config.mk
 *.so
 *.so.*
 *_test
-*_bench
+*_benchmark
 *_stress
 *.out
 *.class
--- a/26
+++ b/26
@ -76,6 +76,7 @@ TESTS = \
 	table_properties_collector_test \
 	arena_test \
 	auto_roll_logger_test \
+	benchmarkharness_test \
 	block_test \
 	bloom_test \
 	dynamic_bloom_test \
@ -119,10 +120,10 @@ TOOLS = \
        db_stress \
        ldb \
 	db_repl_stress \
-	blob_store_bench
+	blob_store_benchmark

-PROGRAMS = db_bench signal_test table_reader_bench $(TOOLS)
-BENCHMARKS = db_bench_sqlite3 db_bench_tree_db table_reader_bench
+PROGRAMS = db_benchmark signal_test table_reader_benchmark $(TOOLS)
+BENCHMARKS = db_bench_sqlite3 db_bench_tree_db table_reader_benchmark

 # The library name is configurable since we are maintaining libraries of both
 # debug/release mode.
@ -245,8 +246,8 @@ $(LIBRARY): $(LIBOBJECTS)
 	rm -f $@
 	$(AR) -rs $@ $(LIBOBJECTS)

-db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
-	$(CXX) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)
+db_benchmark: db/db_benchmark.o $(LIBOBJECTS) $(TESTUTIL)
+	$(CXX) db/db_benchmark.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)

 block_hash_index_test: table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	 $(CXX) table/block_hash_index_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
@ -260,8 +261,8 @@ db_sanity_test: tools/db_sanity_test.o $(LIBOBJECTS) $(TESTUTIL)
 db_repl_stress: tools/db_repl_stress.o $(LIBOBJECTS) $(TESTUTIL)
 	$(CXX) tools/db_repl_stress.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)

-blob_store_bench: tools/blob_store_bench.o $(LIBOBJECTS) $(TESTUTIL)
-	$(CXX) tools/blob_store_bench.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)
+blob_store_benchmark: tools/blob_store_benchmark.o $(LIBOBJECTS) $(TESTUTIL)
+	$(CXX) tools/blob_store_benchmark.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@  $(LDFLAGS) $(COVERAGEFLAGS)

 db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL)
 	$(CXX) doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) -lsqlite3 $(COVERAGEFLAGS)
@ -308,6 +309,9 @@ stringappend_test: utilities/merge_operators/string_append/stringappend_test.o $
 redis_test: utilities/redis/redis_lists_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) utilities/redis/redis_lists_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

+benchmarkharness_test: util/benchmarkharness_test.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) util/benchmarkharness_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) $(COVERAGEFLAGS)
+
 histogram_test: util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) util/histogram_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o$@ $(LDFLAGS) $(COVERAGEFLAGS)

@ -323,8 +327,8 @@ crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
 db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

-log_write_bench: util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) util/log_write_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg
+log_write_benchmark: util/log_write_benchmark.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) util/log_write_benchmark.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg

 plain_table_db_test: db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
@ -332,8 +336,8 @@ plain_table_db_test: db/plain_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
 simple_table_db_test: db/simple_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/simple_table_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)

-table_reader_bench: table/table_reader_bench.o $(LIBOBJECTS) $(TESTHARNESS)
-	$(CXX) table/table_reader_bench.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg
+table_reader_benchmark: table/table_reader_benchmark.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) table/table_reader_benchmark.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) -pg

 perf_context_test: db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS)
--- a/build_tools/build_detect_platform
+++ b/build_tools/build_detect_platform
@ -166,9 +166,9 @@ DIRS="util db table utilities"

 set -f # temporarily disable globbing so that our patterns arent expanded
 PRUNE_TEST="-name *test*.cc -prune"
-PRUNE_BENCH="-name *_bench.cc -prune"
-PORTABLE_FILES=`cd $ROCKSDB_ROOT; find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cc' -print | sort | tr "\n" " "`
-PORTABLE_CPP=`cd $ROCKSDB_ROOT; find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o -name '*.cpp' -print | sort | tr "\n" " "`
+PRUNE_BENCHMARK="-name *_benchmark*.cc -prune"
+PORTABLE_FILES=`cd $ROCKSDB_ROOT; find $DIRS $PRUNE_TEST -o $PRUNE_BENCHMARK -o -name '*.cc' -print | sort | tr "\n" " "`
+PORTABLE_CPP=`cd $ROCKSDB_ROOT; find $DIRS $PRUNE_TEST -o $PRUNE_BENCHMARK -o -name '*.cpp' -print | sort | tr "\n" " "`
 set +f # re-enable globbing

 # The sources consist of the portable files, plus the platform-specific port
--- a/db/db_benchmark.cc
+++ b/db/db_benchmark.cc
--- a/table/table_reader_benchmark.cc
+++ b/table/table_reader_benchmark.cc
--- a/tools/blob_store_benchmark.cc
+++ b/tools/blob_store_benchmark.cc
--- a/util/benchmarkharness.cc
+++ b/util/benchmarkharness.cc
@ -0,0 +1,414 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// This code is derived from Benchmark.cpp implemented in Folly, the opensourced
+// Facebook C++ library available at https://github.com/facebook/folly
+// The code has removed any dependence on other folly and boost libraries
+
+#include "util/benchmarkharness.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <utility>
+#include <vector>
+
+using std::function;
+using std::get;
+using std::make_pair;
+using std::max;
+using std::min;
+using std::pair;
+using std::sort;
+using std::string;
+using std::tuple;
+using std::vector;
+
+DEFINE_bool(benchmark, false, "Run benchmarks.");
+
+DEFINE_int64(bm_min_usec, 100,
+             "Minimum # of microseconds we'll accept for each benchmark.");
+
+DEFINE_int64(bm_min_iters, 1,
+             "Minimum # of iterations we'll try for each benchmark.");
+
+DEFINE_int32(bm_max_secs, 1,
+             "Maximum # of seconds we'll spend on each benchmark.");
+
+
+namespace rocksdb {
+namespace benchmark {
+
+BenchmarkSuspender::NanosecondsSpent BenchmarkSuspender::nsSpent;
+
+typedef function<uint64_t(unsigned int)> BenchmarkFun;
+static vector<tuple<const char*, const char*, BenchmarkFun>> benchmarks;
+
+// Add the global baseline
+BENCHMARK(globalBenchmarkBaseline) {
+  asm volatile("");
+}
+
+void detail::AddBenchmarkImpl(const char* file, const char* name,
+                              BenchmarkFun fun) {
+  benchmarks.emplace_back(file, name, std::move(fun));
+}
+
+/**
+ * Given a point, gives density at that point as a number 0.0 < x <=
+ * 1.0. The result is 1.0 if all samples are equal to where, and
+ * decreases near 0 if all points are far away from it. The density is
+ * computed with the help of a radial basis function.
+ */
+static double Density(const double * begin, const double *const end,
+                      const double where, const double bandwidth) {
+  assert(begin < end);
+  assert(bandwidth > 0.0);
+  double sum = 0.0;
+  for (auto i = begin; i < end; i++) {
+    auto d = (*i - where) / bandwidth;
+    sum += exp(- d * d);
+  }
+  return sum / (end - begin);
+}
+
+/**
+ * Computes mean and variance for a bunch of data points. Note that
+ * mean is currently not being used.
+ */
+static pair<double, double>
+MeanVariance(const double * begin, const double *const end) {
+  assert(begin < end);
+  double sum = 0.0, sum2 = 0.0;
+  for (auto i = begin; i < end; i++) {
+    sum += *i;
+    sum2 += *i * *i;
+  }
+  auto const n = end - begin;
+  return make_pair(sum / n, sqrt((sum2 - sum * sum / n) / n));
+}
+
+/**
+ * Computes the mode of a sample set through brute force. Assumes
+ * input is sorted.
+ */
+static double Mode(const double * begin, const double *const end) {
+  assert(begin < end);
+  // Lower bound and upper bound for result and their respective
+  // densities.
+  auto
+    result = 0.0,
+    bestDensity = 0.0;
+
+  // Get the variance so we pass it down to Density()
+  auto const sigma = MeanVariance(begin, end).second;
+  if (!sigma) {
+    // No variance means constant signal
+    return *begin;
+  }
+
+  for (auto i = begin; i < end; i++) {
+    assert(i == begin || *i >= i[-1]);
+    auto candidate = Density(begin, end, *i, sigma * sqrt(2.0));
+    if (candidate > bestDensity) {
+      // Found a new best
+      bestDensity = candidate;
+      result = *i;
+    } else {
+      // Density is decreasing... we could break here if we definitely
+      // knew this is unimodal.
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Given a bunch of benchmark samples, estimate the actual run time.
+ */
+static double EstimateTime(double * begin, double * end) {
+  assert(begin < end);
+
+  // Current state of the art: get the minimum. After some
+  // experimentation, it seems taking the minimum is the best.
+
+  return *std::min_element(begin, end);
+
+  // What follows after estimates the time as the mode of the
+  // distribution.
+
+  // Select the awesomest (i.e. most frequent) result. We do this by
+  // sorting and then computing the longest run length.
+  sort(begin, end);
+
+  // Eliminate outliers. A time much larger than the minimum time is
+  // considered an outlier.
+  while (end[-1] > 2.0 * *begin) {
+    --end;
+    if (begin == end) {
+//      LOG(INFO) << *begin;
+    }
+    assert(begin < end);
+  }
+
+  double result = 0;
+
+  /* Code used just for comparison purposes */ {
+    unsigned bestFrequency = 0;
+    unsigned candidateFrequency = 1;
+    double candidateValue = *begin;
+    for (auto current = begin + 1; ; ++current) {
+      if (current == end || *current != candidateValue) {
+        // Done with the current run, see if it was best
+        if (candidateFrequency > bestFrequency) {
+          bestFrequency = candidateFrequency;
+          result = candidateValue;
+        }
+        if (current == end) {
+          break;
+        }
+        // Start a new run
+        candidateValue = *current;
+        candidateFrequency = 1;
+      } else {
+        // Cool, inside a run, increase the frequency
+        ++candidateFrequency;
+      }
+    }
+  }
+
+  result = Mode(begin, end);
+
+  return result;
+}
+
+static double RunBenchmarkGetNSPerIteration(const BenchmarkFun& fun,
+                                            const double globalBaseline) {
+  // They key here is accuracy; too low numbers means the accuracy was
+  // coarse. We up the ante until we get to at least minNanoseconds
+  // timings.
+  static uint64_t resolutionInNs = 0;
+  if (!resolutionInNs) {
+    timespec ts;
+    ASSERT_EQ(0, clock_getres(detail::DEFAULT_CLOCK_ID, &ts));
+    ASSERT_EQ(0, ts.tv_sec);  // "Clock sucks.";
+    ASSERT_LT(0, ts.tv_nsec);  // "Clock too fast for its own good.";
+    ASSERT_EQ(1, ts.tv_nsec);  // "Clock too coarse, upgrade your kernel.";
+    resolutionInNs = ts.tv_nsec;
+  }
+  // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
+  // the clock resolution is worse than that, it will be larger. In
+  // essence we're aiming at making the quantization noise 0.01%.
+  static const auto minNanoseconds =
+    max(FLAGS_bm_min_usec * 1000UL,
+        min<uint64_t>(resolutionInNs * 100000, 1000000000ULL));
+
+  // We do measurements in several epochs and take the minimum, to
+  // account for jitter.
+  static const unsigned int epochs = 1000;
+  // We establish a total time budget as we don't want a measurement
+  // to take too long. This will curtail the number of actual epochs.
+  const uint64_t timeBudgetInNs = FLAGS_bm_max_secs * 1000000000;
+  timespec global;
+  ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &global));
+
+  double epochResults[epochs] = { 0 };
+  size_t actualEpochs = 0;
+
+  for (; actualEpochs < epochs; ++actualEpochs) {
+    for (unsigned int n = FLAGS_bm_min_iters; n < (1UL << 30); n *= 2) {
+      auto const nsecs = fun(n);
+      if (nsecs < minNanoseconds) {
+        continue;
+      }
+      // We got an accurate enough timing, done. But only save if
+      // smaller than the current result.
+      epochResults[actualEpochs] = max(0.0,
+          static_cast<double>(nsecs) / n - globalBaseline);
+      // Done with the current epoch, we got a meaningful timing.
+      break;
+    }
+    timespec now;
+    ASSERT_EQ(0, clock_gettime(CLOCK_REALTIME, &now));
+    if (detail::TimespecDiff(now, global) >= timeBudgetInNs) {
+      // No more time budget available.
+      ++actualEpochs;
+      break;
+    }
+  }
+
+  // If the benchmark was basically drowned in baseline noise, it's
+  // possible it became negative.
+  return max(0.0, EstimateTime(epochResults, epochResults + actualEpochs));
+}
+
+struct ScaleInfo {
+  double boundary;
+  const char* suffix;
+};
+
+static const ScaleInfo kTimeSuffixes[] {
+  { 365.25 * 24 * 3600, "years" },
+  { 24 * 3600, "days" },
+  { 3600, "hr" },
+  { 60, "min" },
+  { 1, "s" },
+  { 1E-3, "ms" },
+  { 1E-6, "us" },
+  { 1E-9, "ns" },
+  { 1E-12, "ps" },
+  { 1E-15, "fs" },
+  { 0, nullptr },
+};
+
+static const ScaleInfo kMetricSuffixes[] {
+  { 1E24, "Y" },  // yotta
+  { 1E21, "Z" },  // zetta
+  { 1E18, "X" },  // "exa" written with suffix 'X' so as to not create
+                  //   confusion with scientific notation
+  { 1E15, "P" },  // peta
+  { 1E12, "T" },  // terra
+  { 1E9, "G" },   // giga
+  { 1E6, "M" },   // mega
+  { 1E3, "K" },   // kilo
+  { 1, "" },
+  { 1E-3, "m" },  // milli
+  { 1E-6, "u" },  // micro
+  { 1E-9, "n" },  // nano
+  { 1E-12, "p" },  // pico
+  { 1E-15, "f" },  // femto
+  { 1E-18, "a" },  // atto
+  { 1E-21, "z" },  // zepto
+  { 1E-24, "y" },  // yocto
+  { 0, nullptr },
+};
+
+static string HumanReadable(double n, unsigned int decimals,
+                            const ScaleInfo* scales) {
+  if (std::isinf(n) || std::isnan(n)) {
+    return std::to_string(n);
+  }
+
+  const double absValue = fabs(n);
+  const ScaleInfo* scale = scales;
+  while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
+    ++scale;
+  }
+
+  const double scaledValue = n / scale->boundary;
+  char a[80];
+  snprintf(a, sizeof(a), "%.*f%s", decimals, scaledValue, scale->suffix);
+  return a;
+}
+
+static string ReadableTime(double n, unsigned int decimals) {
+  return HumanReadable(n, decimals, kTimeSuffixes);
+}
+
+static string MetricReadable(double n, unsigned int decimals) {
+  return HumanReadable(n, decimals, kMetricSuffixes);
+}
+
+static void PrintBenchmarkResultsAsTable(
+  const vector<tuple<const char*, const char*, double> >& data) {
+  // Width available
+  static const uint columns = 76;
+
+  // Compute the longest benchmark name
+  size_t longestName = 0;
+  for (auto i = 1; i < benchmarks.size(); i++) {
+    longestName = max(longestName, strlen(get<1>(benchmarks[i])));
+  }
+
+  // Print a horizontal rule
+  auto separator = [&](char pad) {
+    puts(string(columns, pad).c_str());
+  };
+
+  // Print header for a file
+  auto header = [&](const char* file) {
+    separator('=');
+    printf("%-*srelative  time/iter  iters/s\n",
+           columns - 28, file);
+    separator('=');
+  };
+
+  double baselineNsPerIter = std::numeric_limits<double>::max();
+  const char* lastFile = "";
+
+  for (auto& datum : data) {
+    auto file = get<0>(datum);
+    if (strcmp(file, lastFile)) {
+      // New file starting
+      header(file);
+      lastFile = file;
+    }
+
+    string s = get<1>(datum);
+    if (s == "-") {
+      separator('-');
+      continue;
+    }
+    bool useBaseline /* = void */;
+    if (s[0] == '%') {
+      s.erase(0, 1);
+      useBaseline = true;
+    } else {
+      baselineNsPerIter = get<2>(datum);
+      useBaseline = false;
+    }
+    s.resize(columns - 29, ' ');
+    auto nsPerIter = get<2>(datum);
+    auto secPerIter = nsPerIter / 1E9;
+    auto itersPerSec = 1 / secPerIter;
+    if (!useBaseline) {
+      // Print without baseline
+      printf("%*s           %9s  %7s\n",
+             static_cast<int>(s.size()), s.c_str(),
+             ReadableTime(secPerIter, 2).c_str(),
+             MetricReadable(itersPerSec, 2).c_str());
+    } else {
+      // Print with baseline
+      auto rel = baselineNsPerIter / nsPerIter * 100.0;
+      printf("%*s %7.2f%%  %9s  %7s\n",
+             static_cast<int>(s.size()), s.c_str(),
+             rel,
+             ReadableTime(secPerIter, 2).c_str(),
+             MetricReadable(itersPerSec, 2).c_str());
+    }
+  }
+  separator('=');
+}
+
+void RunBenchmarks() {
+  ASSERT_TRUE(!benchmarks.empty());
+
+  vector<tuple<const char*, const char*, double>> results;
+  results.reserve(benchmarks.size() - 1);
+
+  // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
+
+  auto const globalBaseline = RunBenchmarkGetNSPerIteration(
+    get<2>(benchmarks.front()), 0);
+  for (auto i = 1; i < benchmarks.size(); i++) {
+    double elapsed = 0.0;
+    if (strcmp(get<1>(benchmarks[i]), "-") != 0) {  // skip separators
+      elapsed = RunBenchmarkGetNSPerIteration(get<2>(benchmarks[i]),
+                                              globalBaseline);
+    }
+    results.emplace_back(get<0>(benchmarks[i]),
+                         get<1>(benchmarks[i]), elapsed);
+  }
+
+  // PLEASE MAKE NOISE. MEASUREMENTS DONE.
+
+  PrintBenchmarkResultsAsTable(results);
+}
+
+}  // namespace benchmark
+}  // namespace rocksdb
--- a/util/benchmarkharness.h
+++ b/util/benchmarkharness.h
@ -0,0 +1,407 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+// This code is derived from Benchmark.h implemented in Folly, the opensourced
+// Facebook C++ library available at https://github.com/facebook/folly
+// The code has removed any dependence on other folly and boost libraries
+
+#pragma once
+
+#include <gflags/gflags.h>
+
+#include <cassert>
+#include <ctime>
+#include <functional>
+#include <limits>
+
+#include "util/testharness.h"
+
+namespace rocksdb {
+namespace benchmark {
+
+/**
+ * Runs all benchmarks defined. Usually put in main().
+ */
+void RunBenchmarks();
+
+namespace detail {
+
+/**
+ * This is the clock ID used for measuring time. On older kernels, the
+ * resolution of this clock will be very coarse, which will cause the
+ * benchmarks to fail.
+ */
+enum Clock { DEFAULT_CLOCK_ID = CLOCK_REALTIME };
+
+/**
+ * Adds a benchmark wrapped in a std::function. Only used
+ * internally. Pass by value is intentional.
+ */
+void AddBenchmarkImpl(const char* file,
+                      const char* name,
+                      std::function<uint64_t(unsigned int)>);
+
+/**
+ * Takes the difference between two timespec values. end is assumed to
+ * occur after start.
+ */
+inline uint64_t TimespecDiff(timespec end, timespec start) {
+  if (end.tv_sec == start.tv_sec) {
+    assert(end.tv_nsec >= start.tv_nsec);
+    return end.tv_nsec - start.tv_nsec;
+  }
+  assert(end.tv_sec > start.tv_sec &&
+         end.tv_sec - start.tv_sec <
+         std::numeric_limits<uint64_t>::max() / 1000000000UL);
+  return (end.tv_sec - start.tv_sec) * 1000000000UL
+    + end.tv_nsec - start.tv_nsec;
+}
+
+/**
+ * Takes the difference between two sets of timespec values. The first
+ * two come from a high-resolution clock whereas the other two come
+ * from a low-resolution clock. The crux of the matter is that
+ * high-res values may be bogus as documented in
+ * http://linux.die.net/man/3/clock_gettime. The trouble is when the
+ * running process migrates from one CPU to another, which is more
+ * likely for long-running processes. Therefore we watch for high
+ * differences between the two timings.
+ *
+ * This function is subject to further improvements.
+ */
+inline uint64_t TimespecDiff(timespec end, timespec start,
+                             timespec endCoarse, timespec startCoarse) {
+  auto fine = TimespecDiff(end, start);
+  auto coarse = TimespecDiff(endCoarse, startCoarse);
+  if (coarse - fine >= 1000000) {
+    // The fine time is in all likelihood bogus
+    return coarse;
+  }
+  return fine;
+}
+
+}  // namespace detail
+
+
+/**
+ * Supporting type for BENCHMARK_SUSPEND defined below.
+ */
+struct BenchmarkSuspender {
+  BenchmarkSuspender() {
+    ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start_));
+  }
+
+  BenchmarkSuspender(const BenchmarkSuspender &) = delete;
+  BenchmarkSuspender(BenchmarkSuspender && rhs) {
+    start_ = rhs.start_;
+    rhs.start_.tv_nsec = rhs.start_.tv_sec = 0;
+  }
+
+  BenchmarkSuspender& operator=(const BenchmarkSuspender &) = delete;
+  BenchmarkSuspender& operator=(BenchmarkSuspender && rhs) {
+    if (start_.tv_nsec > 0 || start_.tv_sec > 0) {
+      tally();
+    }
+    start_ = rhs.start_;
+    rhs.start_.tv_nsec = rhs.start_.tv_sec = 0;
+    return *this;
+  }
+
+  ~BenchmarkSuspender() {
+    if (start_.tv_nsec > 0 || start_.tv_sec > 0) {
+      tally();
+    }
+  }
+
+  void Dismiss() {
+    assert(start_.tv_nsec > 0 || start_.tv_sec > 0);
+    tally();
+    start_.tv_nsec = start_.tv_sec = 0;
+  }
+
+  void Rehire() {
+    assert(start_.tv_nsec == 0 || start_.tv_sec == 0);
+    ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &start_));
+  }
+
+  /**
+   * This helps the macro definition. To get around the dangers of
+   * operator bool, returns a pointer to member (which allows no
+   * arithmetic).
+   */
+  /* implicit */
+  operator int BenchmarkSuspender::*() const {
+    return nullptr;
+  }
+
+  /**
+   * Accumulates nanoseconds spent outside benchmark.
+   */
+  typedef uint64_t NanosecondsSpent;
+  static NanosecondsSpent nsSpent;
+
+ private:
+  void tally() {
+    timespec end;
+    ASSERT_EQ(0, clock_gettime(detail::DEFAULT_CLOCK_ID, &end));
+    nsSpent += detail::TimespecDiff(end, start_);
+    start_ = end;
+  }
+
+  timespec start_;
+};
+
+/**
+ * Adds a benchmark. Usually not called directly but instead through
+ * the macro BENCHMARK defined below. The lambda function involved
+ * must take exactly one parameter of type unsigned, and the benchmark
+ * uses it with counter semantics (iteration occurs inside the
+ * function).
+ */
+template <typename Lambda>
+void
+AddBenchmark_n(const char* file, const char* name, Lambda&& lambda) {
+  auto execute = [=](unsigned int times) -> uint64_t {
+    BenchmarkSuspender::nsSpent = 0;
+    timespec start, end;
+
+    // CORE MEASUREMENT STARTS
+    auto const r1 = clock_gettime(detail::DEFAULT_CLOCK_ID, &start);
+    lambda(times);
+    auto const r2 = clock_gettime(detail::DEFAULT_CLOCK_ID, &end);
+    // CORE MEASUREMENT ENDS
+
+    ASSERT_EQ(0, r1);
+    ASSERT_EQ(0, r2);
+
+    return detail::TimespecDiff(end, start) - BenchmarkSuspender::nsSpent;
+  };
+
+  detail::AddBenchmarkImpl(file, name,
+                           std::function<uint64_t(unsigned int)>(execute));
+}
+
+/**
+ * Adds a benchmark. Usually not called directly but instead through
+ * the macro BENCHMARK defined below. The lambda function involved
+ * must take zero parameters, and the benchmark calls it repeatedly
+ * (iteration occurs outside the function).
+ */
+template <typename Lambda>
+void
+AddBenchmark(const char* file, const char* name, Lambda&& lambda) {
+  AddBenchmark_n(file, name, [=](unsigned int times) {
+      while (times-- > 0) {
+        lambda();
+      }
+    });
+}
+
+}  // namespace benchmark
+}  // namespace rocksdb
+
+/**
+ * FB_ONE_OR_NONE(hello, world) expands to hello and
+ * FB_ONE_OR_NONE(hello) expands to nothing. This macro is used to
+ * insert or eliminate text based on the presence of another argument.
+ */
+#define FB_ONE_OR_NONE(a, ...) FB_THIRD(a, ## __VA_ARGS__, a)
+#define FB_THIRD(a, b, ...) __VA_ARGS__
+
+#define FB_CONCATENATE_IMPL(s1, s2) s1##s2
+#define FB_CONCATENATE(s1, s2) FB_CONCATENATE_IMPL(s1, s2)
+
+#define FB_ANONYMOUS_VARIABLE(str) FB_CONCATENATE(str, __LINE__)
+
+#define FB_STRINGIZE(x) #x
+
+/**
+ * Introduces a benchmark function. Used internally, see BENCHMARK and
+ * friends below.
+ */
+#define BENCHMARK_IMPL_N(funName, stringName, paramType, paramName)     \
+  static void funName(paramType);                                       \
+  static bool FB_ANONYMOUS_VARIABLE(rocksdbBenchmarkUnused) = (         \
+    ::rocksdb::benchmark::AddBenchmark_n(__FILE__, stringName,          \
+      [](paramType paramName) { funName(paramName); }),                 \
+    true);                                                              \
+  static void funName(paramType paramName)
+
+#define BENCHMARK_IMPL(funName, stringName)                             \
+  static void funName();                                                \
+  static bool FB_ANONYMOUS_VARIABLE(rocksdbBenchmarkUnused) = (         \
+    ::rocksdb::benchmark::AddBenchmark(__FILE__, stringName,            \
+      []() { funName(); }),                                             \
+    true);                                                              \
+  static void funName()
+
+/**
+ * Introduces a benchmark function. Use with either one one or two
+ * arguments. The first is the name of the benchmark. Use something
+ * descriptive, such as insertVectorBegin. The second argument may be
+ * missing, or could be a symbolic counter. The counter dictates how
+ * many internal iteration the benchmark does. Example:
+ *
+ * BENCHMARK(vectorPushBack) {
+ *   vector<int> v;
+ *   v.push_back(42);
+ * }
+ *
+ * BENCHMARK_N(insertVectorBegin, n) {
+ *   vector<int> v;
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     v.insert(v.begin(), 42);
+ *   }
+ * }
+ */
+#define BENCHMARK_N(name, ...)                                  \
+  BENCHMARK_IMPL_N(                                             \
+    name,                                                       \
+    FB_STRINGIZE(name),                                         \
+    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
+    __VA_ARGS__)
+
+#define BENCHMARK(name)                                         \
+  BENCHMARK_IMPL(                                               \
+    name,                                                       \
+    FB_STRINGIZE(name))
+
+/**
+ * Defines a benchmark that passes a parameter to another one. This is
+ * common for benchmarks that need a "problem size" in addition to
+ * "number of iterations". Consider:
+ *
+ * void pushBack(uint n, size_t initialSize) {
+ *   vector<int> v;
+ *   BENCHMARK_SUSPEND {
+ *     v.resize(initialSize);
+ *   }
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *    v.push_back(i);
+ *   }
+ * }
+ * BENCHMARK_PARAM(pushBack, 0)
+ * BENCHMARK_PARAM(pushBack, 1000)
+ * BENCHMARK_PARAM(pushBack, 1000000)
+ *
+ * The benchmark above estimates the speed of push_back at different
+ * initial sizes of the vector. The framework will pass 0, 1000, and
+ * 1000000 for initialSize, and the iteration count for n.
+ */
+#define BENCHMARK_PARAM(name, param)                                    \
+  BENCHMARK_NAMED_PARAM(name, param, param)
+
+/*
+ * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
+ * parameter, rather than using the parameter value.
+ *
+ * Useful when the parameter value is not a valid token for string pasting,
+ * of when you want to specify multiple parameter arguments.
+ *
+ * For example:
+ *
+ * void addValue(uint n, int64_t bucketSize, int64_t min, int64_t max) {
+ *   Histogram<int64_t> hist(bucketSize, min, max);
+ *   int64_t num = min;
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     hist.addValue(num);
+ *     ++num;
+ *     if (num > max) { num = min; }
+ *   }
+ * }
+ *
+ * BENCHMARK_NAMED_PARAM(addValue, 0_to_100, 1, 0, 100)
+ * BENCHMARK_NAMED_PARAM(addValue, 0_to_1000, 10, 0, 1000)
+ * BENCHMARK_NAMED_PARAM(addValue, 5k_to_20k, 250, 5000, 20000)
+ */
+#define BENCHMARK_NAMED_PARAM(name, param_name, ...)                    \
+  BENCHMARK_IMPL(                                                       \
+      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
+      FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",              \
+      unsigned,                                                         \
+      iters) {                                                          \
+    name(iters, ## __VA_ARGS__);                                        \
+  }
+
+/**
+ * Just like BENCHMARK, but prints the time relative to a
+ * baseline. The baseline is the most recent BENCHMARK() seen in
+ * lexical order. Example:
+ *
+ * // This is the baseline
+ * BENCHMARK_N(insertVectorBegin, n) {
+ *   vector<int> v;
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     v.insert(v.begin(), 42);
+ *   }
+ * }
+ *
+ * BENCHMARK_RELATIVE_N(insertListBegin, n) {
+ *   list<int> s;
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     s.insert(s.begin(), 42);
+ *   }
+ * }
+ *
+ * Any number of relative benchmark can be associated with a
+ * baseline. Another BENCHMARK() occurrence effectively establishes a
+ * new baseline.
+ */
+#define BENCHMARK_RELATIVE_N(name, ...)                         \
+  BENCHMARK_IMPL_N(                                             \
+    name,                                                       \
+    "%" FB_STRINGIZE(name),                                     \
+    FB_ONE_OR_NONE(unsigned, ## __VA_ARGS__),                   \
+    __VA_ARGS__)
+
+#define BENCHMARK_RELATIVE(name)                                \
+  BENCHMARK_IMPL(                                               \
+    name,                                                       \
+    "%" FB_STRINGIZE(name))
+
+/**
+ * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM.
+ */
+#define BENCHMARK_RELATIVE_PARAM(name, param)                           \
+  BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
+
+/**
+ * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
+ */
+#define BENCHMARK_RELATIVE_NAMED_PARAM(name, param_name, ...)           \
+  BENCHMARK_IMPL(                                                       \
+      FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),              \
+      "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")",          \
+      unsigned,                                                         \
+      iters) {                                                          \
+    name(iters, ## __VA_ARGS__);                                        \
+  }
+
+/**
+ * Draws a line of dashes.
+ */
+#define BENCHMARK_DRAW_LINE()                                       \
+  static bool FB_ANONYMOUS_VARIABLE(rocksdbBenchmarkUnused) = (     \
+    ::rocksdb::benchmark::AddBenchmark(__FILE__, "-", []() { }),               \
+    true);
+
+/**
+ * Allows execution of code that doesn't count torward the benchmark's
+ * time budget. Example:
+ *
+ * BENCHMARK_START_GROUP(insertVectorBegin, n) {
+ *   vector<int> v;
+ *   BENCHMARK_SUSPEND {
+ *     v.reserve(n);
+ *   }
+ *   FOR_EACH_RANGE (i, 0, n) {
+ *     v.insert(v.begin(), 42);
+ *   }
+ * }
+ */
+#define BENCHMARK_SUSPEND                               \
+  if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) =   \
+      ::rocksdb::benchmark::BenchmarkSuspender()) {}               \
+  else
--- a/util/benchmarkharness_test.cc
+++ b/util/benchmarkharness_test.cc
@ -0,0 +1,69 @@
+//  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under the BSD-style license found in the
+//  LICENSE file in the root directory of this source tree. An additional grant
+//  of patent rights can be found in the PATENTS file in the same directory.
+//
+
+#include "util/benchmarkharness.h"
+#include <vector>
+
+namespace rocksdb {
+namespace benchmark {
+
+BENCHMARK(insertFrontVector) {
+  std::vector<int> v;
+  for (int i = 0; i < 100; i++) {
+    v.insert(v.begin(), i);
+  }
+}
+
+BENCHMARK_RELATIVE(insertBackVector) {
+  std::vector<int> v;
+  for (int i = 0; i < 100; i++) {
+    v.insert(v.end(), i);
+  }
+}
+
+BENCHMARK_N(insertFrontVector_n, n) {
+  std::vector<int> v;
+  for (int i = 0; i < n; i++) {
+    v.insert(v.begin(), i);
+  }
+}
+
+BENCHMARK_RELATIVE_N(insertBackVector_n, n) {
+  std::vector<int> v;
+  for (int i = 0; i < n; i++) {
+    v.insert(v.end(), i);
+  }
+}
+
+BENCHMARK_N(insertFrontEnd_n, n) {
+  std::vector<int> v;
+  for (int i = 0; i < n; i++) {
+    v.insert(v.begin(), i);
+  }
+  for (int i = 0; i < n; i++) {
+    v.insert(v.end(), i);
+  }
+}
+
+BENCHMARK_RELATIVE_N(insertFrontEndSuspend_n, n) {
+  std::vector<int> v;
+  for (int i = 0; i < n; i++) {
+    v.insert(v.begin(), i);
+  }
+  BENCHMARK_SUSPEND {
+    for (int i = 0; i < n; i++) {
+      v.insert(v.end(), i);
+    }
+  }
+}
+
+}  // namespace benchmark
+}  // namespace rocksdb
+
+int main(int argc, char** argv) {
+  rocksdb::benchmark::RunBenchmarks();
+  return 0;
+}
--- a/util/log_write_benchmark.cc
+++ b/util/log_write_benchmark.cc
@ -12,7 +12,7 @@

 // A simple benchmark to simulate transactional logs

-DEFINE_int32(num_records, 6000, "Size of each record.");
+DEFINE_int32(num_records, 6000, "Number of records.");
 DEFINE_int32(record_size, 249, "Size of each record.");
 DEFINE_int32(record_interval, 10000, "Interval between records (microSec)");
 DEFINE_int32(bytes_per_sync, 0, "bytes_per_sync parameter in EnvOptions");
@ -20,7 +20,7 @@ DEFINE_bool(enable_sync, false, "sync after each write.");

 namespace rocksdb {
 void RunBenchmark() {
-  std::string file_name = test::TmpDir() + "/log_write_bench.log";
+  std::string file_name = test::TmpDir() + "/log_write_benchmark.log";
  Env* env = Env::Default();
  EnvOptions env_options;
  env_options.use_mmap_writes = false;