Add overwrite_probability for filluniquerandom benchmark in db_bench (#8569)

Summary:
Add flags `overwrite_probability` and `overwrite_window_size` flag to `db_bench`.
Add the possibility of performing a `filluniquerandom` benchmark with an overwrite probability.
For each write operation, there is a probability _p_ that the write is an overwrite (_p_=`overwrite_probability`).
When an overwrite is decided, the key is randomly chosen from the last _N_ keys previously inserted into the DB (with _N_=`overwrite_window_size`).
When a pure write is decided, the key inserted into the DB is unique and therefore will not be an overwrite.
The `overwrite_window_size` is used so that the user can decide if the overwrite are mostly targeting recently inserted keys (when `overwrite_window_size` is small compared to the total number of writes), or can also target keys inserted "a long time ago" (when `overwrite_window_size` is comparable to total number of writes).
Note that total number of writes = # of unique insertions + # of overwrites.
No unit test specifically added.
Local testing show the following **throughputs** for `filluniquerandom` with 1M total writes:
- bypass the code inserts (no `overwrite_probability` flag specified): ~14.0MB/s
- `overwrite_probability=0.99`, `overwrite_window_size=10`: ~17.0MB/s
- `overwrite_probability=0.10`, `overwrite_window_size=10`: ~14.0MB/s
- `overwrite_probability=0.99`, `overwrite_window_size=1M`: ~14.5MB/s
- `overwrite_probability=0.10`, `overwrite_window_size=1M`: ~14.0MB/s

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8569

Reviewed By: pdillinger

Differential Revision: D29818631

Pulled By: bjlemaire

fbshipit-source-id: d472b4ea4e457a4da7c4ee4f14b40cccd6a4587a
This commit is contained in:
Baptiste Lemaire 2021-07-21 11:32:36 -07:00 committed by Facebook GitHub Bot
parent 87e82a41a9
commit 6b4cdacf41

View File

@ -446,6 +446,26 @@ DEFINE_int32(num_multi_db, 0,
DEFINE_double(compression_ratio, 0.5, "Arrange to generate values that shrink"
" to this fraction of their original size after compression");
DEFINE_double(
overwrite_probability, 0.0,
"Used in 'filluniquerandom' benchmark: for each write operation, "
"we give a probability to perform an overwrite instead. The key used for "
"the overwrite is randomly chosen from the last 'overwrite_window_size' "
"keys "
"previously inserted into the DB. "
"Valid overwrite_probability values: [0.0, 1.0].");
DEFINE_uint32(overwrite_window_size, 1,
"Used in 'filluniquerandom' benchmark. For each write "
"operation, when "
"the overwrite_probability flag is set by the user, the key used "
"to perform "
"an overwrite is randomly chosen from the last "
"'overwrite_window_size' keys "
"previously inserted into the DB. "
"Warning: large values can affect throughput. "
"Valid overwrite_window_size values: [1, kMaxUint32].");
DEFINE_double(read_random_exp_range, 0.0,
"Read random's key will be generated using distribution of "
"num * exp(-r) where r is uniform number from 0 to this value. "
@ -4807,6 +4827,36 @@ class Benchmark {
Slice begin_key = AllocateKey(&begin_key_guard);
std::unique_ptr<const char[]> end_key_guard;
Slice end_key = AllocateKey(&end_key_guard);
double p = 0.0;
uint64_t num_overwrites = 0, num_unique_keys = 0;
// If user set overwrite_probability flag,
// check if value is in [0.0,1.0].
if (FLAGS_overwrite_probability > 0.0) {
p = FLAGS_overwrite_probability > 1.0 ? 1.0 : FLAGS_overwrite_probability;
// If overwrite set by user, and UNIQUE_RANDOM mode on,
// the overwrite_window_size must be > 0.
if (write_mode == UNIQUE_RANDOM && FLAGS_overwrite_window_size == 0) {
fprintf(stderr,
"Overwrite_window_size must be strictly greater than 0.\n");
ErrorExit();
}
}
// Default_random_engine provides slightly
// improved throughput over mt19937.
std::default_random_engine overwrite_gen{
static_cast<unsigned int>(FLAGS_seed)};
std::bernoulli_distribution overwrite_decider(p);
// Inserted key window is filled with the last N
// keys previously inserted into the DB (with
// N=FLAGS_overwrite_window_size).
// We use a deque struct because:
// - random access is O(1)
// - insertion/removal at beginning/end is also O(1).
std::deque<int64_t> inserted_key_window;
Random64 reservoir_id_gen(FLAGS_seed);
std::vector<std::unique_ptr<const char[]>> expanded_key_guards;
std::vector<Slice> expanded_keys;
if (FLAGS_expand_range_tombstones) {
@ -4841,7 +4891,26 @@ class Benchmark {
int64_t batch_bytes = 0;
for (int64_t j = 0; j < entries_per_batch_; j++) {
int64_t rand_num = key_gens[id]->Next();
int64_t rand_num = 0;
if ((write_mode == UNIQUE_RANDOM) && (p > 0.0)) {
if ((inserted_key_window.size() > 0) &&
overwrite_decider(overwrite_gen)) {
num_overwrites++;
rand_num = inserted_key_window[reservoir_id_gen.Next() %
inserted_key_window.size()];
} else {
num_unique_keys++;
rand_num = key_gens[id]->Next();
if (inserted_key_window.size() < FLAGS_overwrite_window_size) {
inserted_key_window.push_back(rand_num);
} else {
inserted_key_window.pop_front();
inserted_key_window.push_back(rand_num);
}
}
} else {
rand_num = key_gens[id]->Next();
}
GenerateKeyFromInt(rand_num, FLAGS_num, &key);
Slice val = gen.Generate();
if (use_blob_db_) {
@ -4969,6 +5038,12 @@ class Benchmark {
ErrorExit();
}
}
if ((write_mode == UNIQUE_RANDOM) && (p > 0.0)) {
fprintf(stdout,
"Number of unique keys inerted: %" PRIu64
".\nNumber of overwrites: %" PRIu64 "\n",
num_unique_keys, num_overwrites);
}
thread->stats.AddBytes(bytes);
}