rocksdb/tools/benchmark.sh
Mark Callaghan 99ec2412e5 Make the benchmark scripts configurable and add tests
Summary:
This makes run_flash_bench.sh configurable. Previously it was hardwired for 1B keys and tests
ran for 12 hours each. That kept me from using it. This makes it configuable, adds more tests,
makes the duration per-test configurable and refactors the test scripts.

Adds the seekrandomwhilemerging test to db_bench which is the same as seekrandomwhilewriting except
the writer thread does Merge rather than Put.

Forces the stall-time column in compaction IO stats to use a fixed format (H:M:S) which makes
it easier to scrape and parse. Also adds an option to AppendHumanMicros to force a fixed format.
Sometimes automation and humans want different format.

Calls thread->stats.AddBytes(bytes); in db_bench for more tests to get the MB/sec summary
stats in the output at test end.

Adds the average ingest rate to compaction IO stats. Output now looks like:
https://gist.github.com/mdcallag/2bd64d18be1b93adc494

More information on the benchmark output is at https://gist.github.com/mdcallag/db43a58bd5ac624f01e1

For benchmark.sh changes default RocksDB configuration to reduce stalls:
* min_level_to_compress from 2 to 3
* hard_rate_limit from 2 to 3
* max_grandparent_overlap_factor and max_bytes_for_level_multiplier from 10 to 8
* L0 file count triggers from 4,8,12 to 4,12,20 for (start,stall,stop)

Task ID: #6596829

Blame Rev:

Test Plan:
run tools/run_flash_bench.sh

Revert Plan:

Database Impact:

Memcache Impact:

Other Notes:

EImportant:

- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -

Reviewers: igor

Reviewed By: igor

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D36075
2015-03-30 11:28:25 -07:00

323 lines
9.8 KiB
Bash
Executable File

#!/bin/bash
# REQUIRE: db_bench binary exists in the current directory
if [ $# -ne 1 ]; then
echo -n "./benchmark.sh [bulkload/fillseq/overwrite/filluniquerandom/"
echo "readrandom/readwhilewriting/readwhilemerging/updaterandom/mergerandom]"
exit 0
fi
# size constants
K=1024
M=$((1024 * K))
G=$((1024 * M))
if [ -z $DB_DIR ]; then
echo "DB_DIR is not defined"
exit 0
fi
if [ -z $WAL_DIR ]; then
echo "WAL_DIR is not defined"
exit 0
fi
output_dir=${OUTPUT_DIR:-/tmp/}
if [ ! -d $output_dir ]; then
mkdir -p $output_dir
fi
# all multithreaded tests run with sync=1 unless
# $DB_BENCH_NO_SYNC is defined
syncval="1"
if [ ! -z $DB_BENCH_NO_SYNC ]; then
echo "Turning sync off for all multithreaded tests"
syncval="0";
fi
num_threads=${NUM_THREADS:-16}
# Only for *whilewriting, *whilemerging
writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}
# Only for tests that do range scans
num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10}
cache_size=${CACHE_SIZE:-$((1 * G))}
duration=${DURATION:-0}
num_keys=${NUM_KEYS:-$((1 * G))}
key_size=20
value_size=${VALUE_SIZE:-400}
const_params="
--db=$DB_DIR \
--wal_dir=$WAL_DIR \
--disable_data_sync=0 \
\
--num=$num_keys \
--num_levels=6 \
--key_size=$key_size \
--value_size=$value_size \
--block_size=4096 \
--cache_size=$cache_size \
--cache_numshardbits=6 \
--compression_type=zlib \
--min_level_to_compress=3 \
--compression_ratio=0.5 \
\
--hard_rate_limit=3 \
--rate_limit_delay_max_milliseconds=1000000 \
--write_buffer_size=$((128 * M)) \
--max_write_buffer_number=3 \
--target_file_size_base=$((128 * M)) \
--max_bytes_for_level_base=$((1 * G)) \
\
--verify_checksum=1 \
--delete_obsolete_files_period_micros=$((60 * M)) \
--max_grandparent_overlap_factor=8 \
--max_bytes_for_level_multiplier=8 \
\
--statistics=1 \
--stats_per_interval=1 \
--stats_interval=$((1 * M)) \
--histogram=1 \
\
--memtablerep=skip_list \
--bloom_bits=10 \
--open_files=$((20 * K))"
l0_config="
--level0_file_num_compaction_trigger=4 \
--level0_slowdown_writes_trigger=12 \
--level0_stop_writes_trigger=20"
if [ $duration -gt 0 ]; then
const_params="$const_params --duration=$duration"
fi
params_w="$const_params $l0_config --max_background_compactions=16 --max_background_flushes=16"
params_bulkload="$const_params --max_background_compactions=16 --max_background_flushes=16 \
--level0_file_num_compaction_trigger=$((10 * M)) \
--level0_slowdown_writes_trigger=$((10 * M)) \
--level0_stop_writes_trigger=$((10 * M))"
function summarize_result {
test_out=$1
test_name=$2
bench_name=$3
uptime=$( grep ^Uptime\(secs $test_out | tail -1 | awk '{ printf "%.0f", $2 }' )
stall_time=$( grep "^Cumulative stall" $test_out | tail -1 | awk '{ print $3 }' )
stall_pct=$( grep "^Cumulative stall" $test_out| tail -1 | awk '{ print $5 }' )
ops_sec=$( grep ^${bench_name} $test_out | awk '{ print $5 }' )
mb_sec=$( grep ^${bench_name} $test_out | awk '{ print $7 }' )
lo_wgb=$( grep "^ L0" $test_out | tail -1 | awk '{ print $8 }' )
sum_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ print $8 }' )
wamp=$( echo "scale=1; $sum_wgb / $lo_wgb" | bc )
wmb_ps=$( echo "scale=1; ( $sum_wgb * 1024.0 ) / $uptime" | bc )
usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' )
p50=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $3 }' )
p75=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $5 }' )
p99=$( grep "^Percentiles:" $test_out | awk '{ printf "%.1f", $7 }' )
echo -e "$ops_sec\t$mb_sec\t$lo_wgb\t$sum_wgb\t$wamp\t$wmb_ps\t$usecs_op\t$p50\t$p75\t$p99\t$uptime\t$stall_time\t$stall_pct\t$test_name" \
>> $output_dir/report.txt
}
function run_bulkload {
echo "Bulk loading $num_keys random keys"
cmd="./db_bench --benchmarks=fillrandom \
--use_existing_db=0 \
--disable_auto_compactions=1 \
--sync=0 \
$params_bulkload \
--threads=1 \
2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log"
echo $cmd | tee $output_dir/benchmark_bulkload_fillrandom.log
eval $cmd
summarize_result $output_dir/benchmark_bulkload_fillrandom.log bulkload fillrandom
echo "Compacting..."
cmd="./db_bench --benchmarks=compact \
--use_existing_db=1 \
--disable_auto_compactions=1 \
--sync=0 \
$params_w \
--threads=1 \
2>&1 | tee -a $output_dir/benchmark_bulkload_compact.log"
echo $cmd | tee $output_dir/benchmark_bulkload_compact.log
eval $cmd
}
function run_fillseq {
echo "Loading $num_keys keys sequentially"
cmd="./db_bench --benchmarks=fillseq \
--use_existing_db=0 \
--sync=0 \
$params_w \
--threads=1 \
2>&1 | tee -a $output_dir/benchmark_fillseq.log"
echo $cmd | tee $output_dir/benchmark_fillseq.log
eval $cmd
summarize_result $output_dir/benchmark_fillseq.log fillseq fillseq
}
function run_change {
operation=$1
echo "Do $num_keys random $operation"
out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
cmd="./db_bench --benchmarks=$operation \
--use_existing_db=1 \
--sync=$syncval \
$params_w \
--threads=$num_threads \
--merge_operator=\"put\" \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation
}
function run_filluniquerandom {
echo "Loading $num_keys unique keys randomly"
cmd="./db_bench --benchmarks=filluniquerandom \
--use_existing_db=0 \
--sync=0 \
$params_w \
--threads=1 \
2>&1 | tee -a $output_dir/benchmark_filluniquerandom.log"
echo $cmd | tee $output_dir/benchmark_filluniquerandom.log
eval $cmd
summarize_result $output_dir/benchmark_filluniquerandom.log filluniquerandom filluniquerandom
}
function run_readrandom {
echo "Reading $num_keys random keys"
out_name="benchmark_readrandom.t${num_threads}.log"
cmd="./db_bench --benchmarks=readrandom \
--use_existing_db=1 \
$params_w \
--threads=$num_threads \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom
}
function run_readwhile {
operation=$1
echo "Reading $num_keys random keys while $operation"
out_name="benchmark_readwhile${operation}.t${num_threads}.log"
cmd="./db_bench --benchmarks=readwhile${operation} \
--use_existing_db=1 \
--sync=$syncval \
$params_w \
--threads=$num_threads \
--writes_per_second=$writes_per_second \
--merge_operator=\"put\" \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation}
}
function run_rangewhile {
operation=$1
full_name=$2
reverse_arg=$3
out_name="benchmark_${full_name}.t${num_threads}.log"
echo "Range scan $num_keys random keys while ${operation} for reverse_iter=${reverse_arg}"
cmd="./db_bench --benchmarks=seekrandomwhile${operation} \
--use_existing_db=1 \
--sync=$syncval \
$params_w \
--threads=$num_threads \
--writes_per_second=$writes_per_second \
--merge_operator=\"put\" \
--seek_nexts=$num_nexts_per_seek \
--reverse_iterator=$reverse_arg \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandomwhile${operation}
}
function run_range {
full_name=$1
reverse_arg=$2
out_name="benchmark_${full_name}.t${num_threads}.log"
echo "Range scan $num_keys random keys for reverse_iter=${reverse_arg}"
cmd="./db_bench --benchmarks=seekrandom \
--use_existing_db=1 \
$params_w \
--threads=$num_threads \
--seek_nexts=$num_nexts_per_seek \
--reverse_iterator=$reverse_arg \
2>&1 | tee -a $output_dir/${out_name}"
echo $cmd | tee $output_dir/${out_name}
eval $cmd
summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandom
}
function now() {
echo `date +"%s"`
}
report="$output_dir/report.txt"
schedule="$output_dir/schedule.txt"
echo "===== Benchmark ====="
# Run!!!
IFS=',' read -a jobs <<< $1
for job in ${jobs[@]}; do
if [ $job != debug ]; then
echo "Start $job at `date`" | tee -a $schedule
fi
start=$(now)
if [ $job = bulkload ]; then
run_bulkload
elif [ $job = fillseq ]; then
run_fillseq
elif [ $job = overwrite ]; then
run_change overwrite
elif [ $job = updaterandom ]; then
run_change updaterandom
elif [ $job = mergerandom ]; then
run_change mergerandom
elif [ $job = filluniquerandom ]; then
run_filluniquerandom
elif [ $job = readrandom ]; then
run_readrandom
elif [ $job = fwdrange ]; then
run_range $job false
elif [ $job = revrange ]; then
run_range $job true
elif [ $job = readwhilewriting ]; then
run_readwhile writing
elif [ $job = readwhilemerging ]; then
run_readwhile merging
elif [ $job = fwdrangewhilewriting ]; then
run_rangewhile writing $job false
elif [ $job = revrangewhilewriting ]; then
run_rangewhile writing $job true
elif [ $job = fwdrangewhilemerging ]; then
run_rangewhile merging $job false
elif [ $job = revrangewhilemerging ]; then
run_rangewhile merging $job true
elif [ $job = debug ]; then
num_keys=1000; # debug
echo "Setting num_keys to $num_keys"
else
echo "unknown job $job"
exit
fi
end=$(now)
if [ $job != debug ]; then
echo "Complete $job in $((end-start)) seconds" | tee -a $schedule
fi
echo -e "ops/sec\tmb/sec\tL0_MB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tUptime\tStall-time\tStall%\tTest"
tail -1 $output_dir/report.txt
done