Make mixgraph easier to use (#9711)

Summary:
Changes:
* improves monitoring by displaying average size of a Put value and average scan length
* forces the minimum value size to be 10. Before this it was 0 if you didn't set the distribution parameters.
* uses reasonable defaults for the distribution parameters that determine value size and scan length
* includes seeks in "reads ... found" message, before this they were missing

This is for https://github.com/facebook/rocksdb/issues/9672

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9711

Test Plan:
Before this change:

./db_bench --benchmarks=fillseq,mixgraph --mix_get_ratio=50 --mix_put_ratio=25 --mix_seek_ratio=25 --num=100000 --value_k=0.2615 --value_sigma=25.45 --iter_k=2.517 --iter_sigma=14.236
fillseq      :       4.289 micros/op 233138 ops/sec;   25.8 MB/s
mixgraph     :      18.461 micros/op 54166 ops/sec;  755.0 MB/s ( Gets:50164 Puts:24919 Seek:24917 of 50164 in 75081 found)

After this change:

./db_bench --benchmarks=fillseq,mixgraph --mix_get_ratio=50 --mix_put_ratio=25 --mix_seek_ratio=25 --num=100000 --value_k=0.2615 --value_sigma=25.45 --iter_k=2.517 --iter_sigma=14.236
fillseq      :       3.974 micros/op 251553 ops/sec;   27.8 MB/s
mixgraph     :      16.722 micros/op 59795 ops/sec;  833.5 MB/s ( Gets:50164 Puts:24919 Seek:24917, reads 75081 in 75081 found, avg size: 36.0 value, 504.9 scan)

Reviewed By: jay-zhuang

Differential Revision: D35030190

Pulled By: mdcallag

fbshipit-source-id: d8f555f28d869f752ddb674a524108884511b151
This commit is contained in:
Mark Callaghan 2022-03-21 17:30:51 -07:00 committed by Facebook GitHub Bot
parent cb4d188a34
commit 1ca1562e35

View File

@ -1357,19 +1357,23 @@ DEFINE_double(key_dist_b, 0.0,
DEFINE_double(value_theta, 0.0,
"The parameter 'theta' of Generized Pareto Distribution "
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
DEFINE_double(value_k, 0.0,
// Use reasonable defaults based on the mixgraph paper
DEFINE_double(value_k, 0.2615,
"The parameter 'k' of Generized Pareto Distribution "
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
DEFINE_double(value_sigma, 0.0,
// Use reasonable defaults based on the mixgraph paper
DEFINE_double(value_sigma, 25.45,
"The parameter 'theta' of Generized Pareto Distribution "
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
DEFINE_double(iter_theta, 0.0,
"The parameter 'theta' of Generized Pareto Distribution "
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
DEFINE_double(iter_k, 0.0,
// Use reasonable defaults based on the mixgraph paper
DEFINE_double(iter_k, 2.517,
"The parameter 'k' of Generized Pareto Distribution "
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
DEFINE_double(iter_sigma, 0.0,
// Use reasonable defaults based on the mixgraph paper
DEFINE_double(iter_sigma, 14.236,
"The parameter 'sigma' of Generized Pareto Distribution "
"f(x)=(1/sigma)*(1+k*(x-theta)/sigma)^-(1/k+1)");
DEFINE_double(mix_get_ratio, 1.0,
@ -6026,13 +6030,14 @@ class Benchmark {
// needs to decide the ratio between Get, Put, Iterator queries before
// starting the benchmark.
void MixGraph(ThreadState* thread) {
int64_t read = 0; // including single gets and Next of iterators
int64_t gets = 0;
int64_t puts = 0;
int64_t found = 0;
int64_t get_found = 0;
int64_t seek = 0;
int64_t seek_found = 0;
int64_t bytes = 0;
double total_scan_length = 0;
double total_val_size = 0;
const int64_t default_value_max = 1 * 1024 * 1024;
int64_t value_max = default_value_max;
int64_t scan_len_max = FLAGS_mix_max_scan_len;
@ -6131,7 +6136,6 @@ class Benchmark {
if (query_type == 0) {
// the Get query
gets++;
read++;
if (FLAGS_num_column_families > 1) {
s = db_with_cfh->db->Get(read_options_, db_with_cfh->GetCfh(key_rand),
key, &pinnable_val);
@ -6143,14 +6147,14 @@ class Benchmark {
}
if (s.ok()) {
found++;
get_found++;
bytes += key.size() + pinnable_val.size();
} else if (!s.IsNotFound()) {
fprintf(stderr, "Get returned an error: %s\n", s.ToString().c_str());
abort();
}
if (thread->shared->read_rate_limiter && read % 100 == 0) {
if (thread->shared->read_rate_limiter && (gets + seek) % 100 == 0) {
thread->shared->read_rate_limiter->Request(100, Env::IO_HIGH,
nullptr /*stats*/);
}
@ -6160,11 +6164,13 @@ class Benchmark {
puts++;
int64_t val_size = ParetoCdfInversion(
u, FLAGS_value_theta, FLAGS_value_k, FLAGS_value_sigma);
if (val_size < 0) {
if (val_size < 10) {
val_size = 10;
} else if (val_size > value_max) {
val_size = val_size % value_max;
}
total_val_size += val_size;
s = db_with_cfh->db->Put(
write_options_, key,
gen.Generate(static_cast<unsigned int>(val_size)));
@ -6186,7 +6192,6 @@ class Benchmark {
if (single_iter != nullptr) {
single_iter->Seek(key);
seek++;
read++;
if (single_iter->Valid() && single_iter->key().compare(key) == 0) {
seek_found++;
}
@ -6201,6 +6206,7 @@ class Benchmark {
bytes += single_iter->key().size() + single_iter->value().size();
single_iter->Next();
assert(single_iter->status().ok());
total_scan_length++;
}
}
delete single_iter;
@ -6210,9 +6216,12 @@ class Benchmark {
}
char msg[256];
snprintf(msg, sizeof(msg),
"( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64 " of %" PRIu64
" in %" PRIu64 " found)\n",
gets, puts, seek, found, read);
"( Gets:%" PRIu64 " Puts:%" PRIu64 " Seek:%" PRIu64
", reads %" PRIu64 " in %" PRIu64
" found, "
"avg size: %.1f value, %.1f scan)\n",
gets, puts, seek, get_found + seek_found, gets + seek,
total_val_size / puts, total_scan_length / seek);
thread->stats.AddBytes(bytes);
thread->stats.AddMessage(msg);