db_bench should use a good seed when --seed is not set or set to 0 (#9740)

Summary:
This is for https://github.com/facebook/rocksdb/issues/9737

I have wasted more than a few hours running db_bench benchmarks where --seed was not set
and getting better than expected results because cache hit rates are great because
multiple invocations of db_bench used the same value for --seed or did not set it,
and then all used 0. The result is that all see the same sequence of keys.

Others have done the same. The problem is worse in that it is easy to miss and the result is a benchmark with results that are misleading.

A good way to avoid this is to set it to the equivalent of gettimeofday() when either
--seed is not set or it is set to 0 (the default).

With this change the actual seed is printed when it was 0 at process start:
  Set seed to 1647992570365606 because --seed was 0

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9740

Test Plan:
Perf results:

./db_bench --benchmarks=fillseq,readrandom --num=1000000 --reads=4000000
  readrandom   :       6.469 micros/op 154583 ops/sec;   17.1 MB/s (4000000 of 4000000 found)

./db_bench --benchmarks=fillseq,readrandom --num=1000000 --reads=4000000 --seed=0
  readrandom   :       6.565 micros/op 152321 ops/sec;   16.9 MB/s (4000000 of 4000000 found)

./db_bench --benchmarks=fillseq,readrandom --num=1000000 --reads=4000000 --seed=1
  readrandom   :       6.461 micros/op 154777 ops/sec;   17.1 MB/s (4000000 of 4000000 found)

./db_bench --benchmarks=fillseq,readrandom --num=1000000 --reads=4000000 --seed=2
  readrandom   :       6.525 micros/op 153244 ops/sec;   17.0 MB/s (4000000 of 4000000 found)

Reviewed By: jay-zhuang

Differential Revision: D35145361

Pulled By: mdcallag

fbshipit-source-id: 2b35b153ccec46b27d7c9405997523555fc51267
This commit is contained in:
Mark Callaghan 2022-03-25 10:12:27 -07:00 committed by Facebook GitHub Bot
parent 98130c5a26
commit 1a130fa3c1

View File

@ -285,8 +285,10 @@ DEFINE_int64(deletes, -1, "Number of delete operations to do. "
DEFINE_int32(bloom_locality, 0, "Control bloom filter probes locality"); DEFINE_int32(bloom_locality, 0, "Control bloom filter probes locality");
DEFINE_int64(seed, 0, "Seed base for random number generators. " DEFINE_int64(seed, 0,
"When 0 it is deterministic."); "Seed base for random number generators. "
"When 0 it is derived from the current time.");
static int64_t seed_base;
DEFINE_int32(threads, 1, "Number of concurrent threads to run."); DEFINE_int32(threads, 1, "Number of concurrent threads to run.");
@ -2425,8 +2427,7 @@ struct ThreadState {
Stats stats; Stats stats;
SharedState* shared; SharedState* shared;
explicit ThreadState(int index) explicit ThreadState(int index) : tid(index), rand(seed_base + index) {}
: tid(index), rand((FLAGS_seed ? FLAGS_seed : 1000) + index) {}
}; };
class Duration { class Duration {
@ -4604,7 +4605,7 @@ class Benchmark {
values_[i] = i; values_[i] = i;
} }
RandomShuffle(values_.begin(), values_.end(), RandomShuffle(values_.begin(), values_.end(),
static_cast<uint32_t>(FLAGS_seed)); static_cast<uint32_t>(seed_base));
} }
} }
@ -4717,7 +4718,7 @@ class Benchmark {
// Default_random_engine provides slightly // Default_random_engine provides slightly
// improved throughput over mt19937. // improved throughput over mt19937.
std::default_random_engine overwrite_gen{ std::default_random_engine overwrite_gen{
static_cast<unsigned int>(FLAGS_seed)}; static_cast<unsigned int>(seed_base)};
std::bernoulli_distribution overwrite_decider(p); std::bernoulli_distribution overwrite_decider(p);
// Inserted key window is filled with the last N // Inserted key window is filled with the last N
@ -4727,7 +4728,7 @@ class Benchmark {
// - random access is O(1) // - random access is O(1)
// - insertion/removal at beginning/end is also O(1). // - insertion/removal at beginning/end is also O(1).
std::deque<int64_t> inserted_key_window; std::deque<int64_t> inserted_key_window;
Random64 reservoir_id_gen(FLAGS_seed); Random64 reservoir_id_gen(seed_base);
// --- Variables used in disposable/persistent keys simulation: // --- Variables used in disposable/persistent keys simulation:
// The following variables are used when // The following variables are used when
@ -4764,7 +4765,7 @@ class Benchmark {
ErrorExit(); ErrorExit();
} }
} }
Random rnd_disposable_entry(static_cast<uint32_t>(FLAGS_seed)); Random rnd_disposable_entry(static_cast<uint32_t>(seed_base));
std::string random_value; std::string random_value;
// Queue that stores scheduled timestamp of disposable entries deletes, // Queue that stores scheduled timestamp of disposable entries deletes,
// along with starting index of disposable entry keys to delete. // along with starting index of disposable entry keys to delete.
@ -8105,6 +8106,15 @@ int db_bench_tool(int argc, char** argv) {
FLAGS_use_existing_db |= FLAGS_readonly; FLAGS_use_existing_db |= FLAGS_readonly;
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
if (!FLAGS_seed) {
uint64_t now = FLAGS_env->GetSystemClock()->NowMicros();
seed_base = static_cast<int64_t>(now);
fprintf(stdout, "Set seed to %" PRIu64 " because --seed was 0\n",
seed_base);
} else {
seed_base = FLAGS_seed;
}
if (FLAGS_use_existing_keys && !FLAGS_use_existing_db) { if (FLAGS_use_existing_keys && !FLAGS_use_existing_db) {
fprintf(stderr, fprintf(stderr,
"`-use_existing_db` must be true for `-use_existing_keys` to be " "`-use_existing_db` must be true for `-use_existing_keys` to be "