For db_bench --benchmarks=fillseq with --num_multi_db load databases … (#9713)

Summary:
…in order

This fixes https://github.com/facebook/rocksdb/issues/9650
For db_bench --benchmarks=fillseq --num_multi_db=X it loads databases in sequence
rather than randomly choosing a database per Put. The benefits are:
1) avoids long delays between flushing memtables
2) avoids flushing memtables for all of them at the same point in time
3) puts same number of keys per database so that query tests will find keys as expected

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9713

Test Plan:
Using db_bench.1 without the change and db_bench.2 with the change:

for i in 1 2; do rm -rf /data/m/rx/* ; time ./db_bench.$i --db=/data/m/rx --benchmarks=fillseq --num_multi_db=4 --num=10000000; du -hs /data/m/rx ; done

 --- without the change
    fillseq      :       3.188 micros/op 313682 ops/sec;   34.7 MB/s
    real    2m7.787s
    user    1m52.776s
    sys     0m46.549s
    2.7G    /data/m/rx

 --- with the change

    fillseq      :       3.149 micros/op 317563 ops/sec;   35.1 MB/s
    real    2m6.196s
    user    1m51.482s
    sys     0m46.003s
    2.7G    /data/m/rx

    Also, temporarily added a printf to confirm that the code switches to the next database at the right time
    ZZ switch to db 1 at 10000000
    ZZ switch to db 2 at 20000000
    ZZ switch to db 3 at 30000000

for i in 1 2; do rm -rf /data/m/rx/* ; time ./db_bench.$i --db=/data/m/rx --benchmarks=fillseq,readrandom --num_multi_db=4 --num=100000; du -hs /data/m/rx ; done

 --- without the change, smaller database, note that not all keys are found by readrandom because databases have < and > --num keys

    fillseq      :       3.176 micros/op 314805 ops/sec;   34.8 MB/s
    readrandom   :       1.913 micros/op 522616 ops/sec;   57.7 MB/s (99873 of 100000 found)

 --- with the change, smaller database, note that all keys are found by readrandom

    fillseq      :       3.110 micros/op 321566 ops/sec;   35.6 MB/s
    readrandom   :       1.714 micros/op 583257 ops/sec;   64.5 MB/s (100000 of 100000 found)

Reviewed By: jay-zhuang

Differential Revision: D35030168

Pulled By: mdcallag

fbshipit-source-id: 2a18c4ec571d954cf5a57b00a11802a3608823ee
This commit is contained in:
Mark Callaghan 2022-03-22 10:36:24 -07:00 committed by Facebook GitHub Bot
parent 8102690a52
commit 63a284a6ad

View File

@ -4769,6 +4769,9 @@ class Benchmark {
int64_t stage = 0;
int64_t num_written = 0;
int64_t next_seq_db_at = num_ops;
size_t id = 0;
while ((num_per_key_gen != 0) && !duration.Done(entries_per_batch_)) {
if (duration.GetStage() != stage) {
stage = duration.GetStage();
@ -4781,8 +4784,25 @@ class Benchmark {
}
}
size_t id = thread->rand.Next() % num_key_gens;
if (write_mode != SEQUENTIAL) {
id = thread->rand.Next() % num_key_gens;
} else {
// When doing a sequential load with multiple databases, load them in
// order rather than all at the same time to avoid:
// 1) long delays between flushing memtables
// 2) flushing memtables for all of them at the same point in time
// 3) not putting the same number of keys in each database
if (num_written >= next_seq_db_at) {
next_seq_db_at += num_ops;
id++;
if (id >= num_key_gens) {
fprintf(stderr, "Logic error. Filled all databases\n");
ErrorExit();
}
}
}
DBWithColumnFamilies* db_with_cfh = SelectDBWithCfh(id);
batch.Clear();
int64_t batch_bytes = 0;