From 1f9d58b2e2d1f7617db3a288e0f48ca42b5ad359 Mon Sep 17 00:00:00 2001 From: Mark Callaghan Date: Wed, 16 Mar 2022 16:51:47 -0700 Subject: [PATCH] Enhance to support more tuning options, and universal and intergrated BlobDB for all tests Summary: This does two big things: * provides more tuning options * supports universal and integrated BlobDB for all of the benchmarks that are leveled-only It does several smaller things, and I will list a few * sets l0_slowdown_writes_trigger which wasn't set before this diff. * improves readability in report.tsv by using smaller field names in the header * adds more columns to report.tsv report.tsv before this diff: ops_sec mb_sec total_size_gb level0_size_gb sum_gb write_amplification write_mbps usec_op percentile_50 percentile_75 percentile_99 percentile_99.9 percentile_99.99 uptime stall_time stall_percent test_name test_date rocksdb_version job_id 823294 329.8 0.0 21.5 21.5 1.0 183.4 1.2 1.0 1.0 3 6 14 120 00:00:0.000 0.0 fillseq.wal_disabled.v400 2022-03-16T15:46:45.000-07:00 7.0 326520 130.8 0.0 0.0 0.0 0.0 0 12.2 139.8 155.1 170 234 250 60 00:00:0.000 0.0 multireadrandom.t4 2022-03-16T15:48:47.000-07:00 7.0 86313 345.7 0.0 0.0 0.0 0.0 0 46.3 44.8 50.6 75 84 108 60 00:00:0.000 0.0 revrangewhilewriting.t4 2022-03-16T15:50:48.000-07:00 7.0 101294 405.7 0.0 0.1 0.1 1.0 1.6 39.5 40.4 45.9 64 75 103 62 00:00:0.000 0.0 fwdrangewhilewriting.t4 2022-03-16T15:52:50.000-07:00 7.0 258141 103.4 0.0 0.1 1.2 18.2 19.8 15.5 14.3 18.1 28 34 48 62 00:00:0.000 0.0 readwhilewriting.t4 2022-03-16T15:54:51.000-07:00 7.0 334690 134.1 0.0 7.6 18.7 4.2 308.8 12.0 11.8 13.7 21 30 62 62 00:00:0.000 0.0 overwrite.t4.s0 2022-03-16T15:56:53.000-07:00 7.0 report.tsv with this diff: ops_sec mb_sec lsm_sz blob_sz c_wgb w_amp c_mbps c_wsecs c_csecs b_rgb b_wgb usec_op p50 p99 p99.9 p99.99 pmax uptime stall% Nstall u_cpu s_cpu rss test date version job_id 831144 332.9 22GB 0.0GB, 21.7 1.0 185.1 264 262 0 0 1.2 1.0 3 6 14 9198 120 0.0 0 0.4 0.0 0.7 fillseq.wal_disabled.v400 2022-03-16T16:21:23 7.0 325229 130.3 22GB 0.0GB, 0.0 0.0 0 0 0 0 12.3 139.8 170 237 249 572 60 0.0 0 0.4 0.1 1.2 multireadrandom.t4 2022-03-16T16:23:25 7.0 312920 125.3 26GB 0.0GB, 11.1 2.6 189.3 115 113 0 0 12.8 11.8 21 34 1255 6442 60 0.2 1 0.7 0.1 0.6 overwritesome.t4.s0 2022-03-16T16:25:27 7.0 81698 327.2 25GB 0.0GB, 0.0 0.0 0 0 0 0 48.9 46.2 79 246 369 9445 60 0.0 0 0.4 0.1 1.4 revrangewhilewriting.t4 2022-03-16T16:30:21 7.0 92484 370.4 25GB 0.0GB, 0.1 1.5 1.1 1 0 0 0 43.2 42.3 75 103 110 9512 62 0.0 0 0.4 0.1 1.4 fwdrangewhilewriting.t4 2022-03-16T16:32:24 7.0 241661 96.8 25GB 0.0GB, 0.1 1.5 1.1 1 0 0 0 16.5 17.1 30 34 49 9092 62 0.0 0 0.4 0.1 1.4 readwhilewriting.t4 2022-03-16T16:34:27 7.0 305234 122.3 30GB 0.0GB, 12.1 2.7 201.7 127 124 0 0 13.1 11.8 21 128 1934 6339 62 0.0 0 0.7 0.1 0.7 overwrite.t4.s0 2022-03-16T16:36:30 7.0 Test Plan: run it Reviewers: Subscribers: Tasks: Tags: --- tools/benchmark.sh | 491 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 428 insertions(+), 63 deletions(-) diff --git a/tools/benchmark.sh b/tools/benchmark.sh index 2381be2ff..a6330385b 100755 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -48,13 +48,47 @@ function display_usage() { echo -e "\tVALUE_SIZE\t\t\tThe size of the values to use in the benchmark (default: 400 bytes)" echo -e "\tBLOCK_SIZE\t\t\tThe size of the database blocks in the benchmark (default: 8 KB)" echo -e "\tDB_BENCH_NO_SYNC\t\tDisable fsync on the WAL" + echo -e "\tNUMACTL\t\tWhen defined use numactl --interleave=all" echo -e "\tNUM_THREADS\t\t\tThe number of threads to use (default: 64)" echo -e "\tMB_WRITE_PER_SEC" echo -e "\tNUM_NEXTS_PER_SEEK\t\t(default: 10)" echo -e "\tCACHE_SIZE\t\t\t(default: 16GB)" echo -e "\tCOMPRESSION_MAX_DICT_BYTES" echo -e "\tCOMPRESSION_TYPE\t\t(default: zstd)" - echo -e "\tDURATION" + echo -e "\tBOTTOMMOST_COMPRESSION\t\t(default: none)" + echo -e "\tMIN_LEVEL_TO_COMPRESS\t\tValue for min_level_to_compress for Leveled" + echo -e "\tCOMPRESSION_SIZE_PERCENT\t\tValue for compression_size_percent for Universal" + echo -e "\tDURATION\t\t\tNumber of seconds for which the test runs" + echo -e "\tWRITES\t\t\tNumber of writes for which the test runs" + echo -e "\tWRITE_BUFFER_SIZE_MB\t\tThe size of the write buffer in MB (default: 128)" + echo -e "\tTARGET_FILE_SIZE_BASE_MB\t\tThe value for target_file_size_base in MB (default: 128)" + echo -e "\tMAX_BYTES_FOR_LEVEL_BASE_MB\t\tThe value for max_bytes_for_level_base in MB (default: 128)" + echo -e "\tMAX_BACKGROUND_JOBS\t\t\tThe value for max_background_jobs (default: 16)" + echo -e "\tCACHE_INDEX_AND_FILTER_BLOCKS\t\tThe value for cache_index_and_filter_blocks (default: 0)" + echo -e "\tUSE_O_DIRECT\t\tUse O_DIRECT for user reads and compaction" + echo -e "\tSOFT_PENDING_COMPACTION_BYTES_LIMIT_IN_GB\tThe value for soft_pending_compaction_bytes_limit in GB" + echo -e "\tHARD_PENDING_COMPACTION_BYTES_LIMIT_IN_GB\tThe value for hard_pending_compaction_bytes_limit in GB" + echo -e "\tSTATS_INTERVAL_SECONDS\tValue for stats_interval_seconds" + echo -e "\tREPORT_INTERVAL_SECONDS\tValue for report_interval_seconds" + echo -e "\tSUBCOMPACTIONS\t\tValue for subcompactions" + echo -e "\tLEVEL0_FILE_NUM_COMPACTION_TRIGGER\tValue for level0_file_num_compaction_trigger" + echo -e "\tLEVEL0_SLOWDOWN_WRITES_TRIGGER\tValue for level0_slowdown_writes_trigger" + echo -e "\tLEVEL0_STOP_WRITES_TRIGGER\tValue for level0_stop_writes_trigger" + echo -e "\tPER_LEVEL_FANOUT\tValue for max_bytes_for_level_multiplier" + echo -e "\tOptions for universal compaction:" + echo -e "\tUNIVERSAL\t\tUse universal compaction when set to anything, otherwise use leveled" + echo -e "\tUNIVERSAL_MIN_MERGE_WIDTH\tValue of min_merge_width option for universal" + echo -e "\tUNIVERSAL_MAX_MERGE_WIDTH\tValue of min_merge_width option for universal" + echo -e "\tUNIVERSAL_SIZE_RATIO\tValue of size_ratio option for universal" + echo -e "\tUNIVERSAL_MAX_SIZE_AMP\tmax_size_amplification_percent for universal" + echo -e "\tUNIVERSAL_ALLOW_TRIVIAL_MOVE\tSet allow_trivial_move to true for universal, default is false" + echo -e "\tOptions for integrated BlobDB" + echo -e "\tMIN_BLOB_SIZE\tValue for min_blob_size" + echo -e "\tBLOB_FILE_SIZE\tValue for blob_file_size" + echo -e "\tBLOB_COMPRESSION_TYPE\tValue for blob_compression_type" + echo -e "\tBLOB_GC_AGE_CUTOFF\tValue for blob_garbage_collection_age_cutoff" + echo -e "\tBLOB_GC_FORCE_THRESHOLD\tValue for blob_garbage_collection_force_threshold" + echo -e "\tCOMPACTION_STYLE\tOne of leveled, universal, blob. Default is leveled." } if [ $# -lt 1 ]; then @@ -106,6 +140,18 @@ if [ ! -z $DB_BENCH_NO_SYNC ]; then syncval="0"; fi +compaction_style=${COMPACTION_STYLE:-leveled} +if [ $compaction_style = "leveled" ]; then + echo Use leveled compaction +elif [ $compaction_style = "universal" ]; then + echo Use universal compaction +elif [ $compaction_style = "blob" ]; then + echo Use blob compaction +else + echo COMPACTION_STYLE is :: $COMPACTION_STYLE :: and must be one of leveled, universal, blob + exit $EXIT_INVALID_ARGS +fi + num_threads=${NUM_THREADS:-64} mb_written_per_sec=${MB_WRITE_PER_SEC:-0} # Only for tests that do range scans @@ -113,19 +159,81 @@ num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10} cache_size=${CACHE_SIZE:-$((17179869184))} compression_max_dict_bytes=${COMPRESSION_MAX_DICT_BYTES:-0} compression_type=${COMPRESSION_TYPE:-zstd} +min_level_to_compress=${MIN_LEVEL_TO_COMPRESS:-"-1"} +compression_size_percent=${COMPRESSION_SIZE_PERCENT:-"-1"} + duration=${DURATION:-0} +writes=${WRITES:-0} num_keys=${NUM_KEYS:-8000000000} key_size=${KEY_SIZE:-20} value_size=${VALUE_SIZE:-400} block_size=${BLOCK_SIZE:-8192} +write_buffer_mb=${WRITE_BUFFER_SIZE_MB:-128} +target_file_mb=${TARGET_FILE_SIZE_BASE_MB:-128} +l1_mb=${MAX_BYTES_FOR_LEVEL_BASE_MB:-1024} +max_background_jobs=${MAX_BACKGROUND_JOBS:-16} +stats_interval_seconds=${STATS_INTERVAL_SECONDS:-60} +report_interval_seconds=${REPORT_INTERVAL_SECONDS:-5} +subcompactions=${SUBCOMPACTIONS:-1} +per_level_fanout=${PER_LEVEL_FANOUT:-8} -const_params=" +cache_index_and_filter=${CACHE_INDEX_AND_FILTER_BLOCKS:-0} +if [[ $cache_index_and_filter -eq 0 ]]; then + cache_meta_flags="" +elif [[ $cache_index_and_filter -eq 1 ]]; then + cache_meta_flags="\ + --cache_index_and_filter_blocks=$cache_index_and_filter \ + --cache_high_pri_pool_ratio=0.5" +else + echo CACHE_INDEX_AND_FILTER_BLOCKS was $CACHE_INDEX_AND_FILTER_BLOCKS but most be 0 or 1 + exit $EXIT_INVALID_ARGS +fi + +soft_pending_arg="" +if [ ! -z $SOFT_PENDING_COMPACTION_BYTES_LIMIT_IN_GB ]; then + soft_pending_bytes=$( echo $SOFT_PENDING_COMPACTION_BYTES_LIMIT_IN_GB | \ + awk '{ printf "%.0f", $1 * 1024 * 1024 * 1024 }' ) + soft_pending_arg="--soft_pending_compaction_bytes_limit=$soft_pending_bytes" +fi + +hard_pending_arg="" +if [ ! -z $HARD_PENDING_COMPACTION_BYTES_LIMIT_IN_GB ]; then + hard_pending_bytes=$( echo $HARD_PENDING_COMPACTION_BYTES_LIMIT_IN_GB | \ + awk '{ printf "%.0f", $1 * 1024 * 1024 * 1024 }' ) + hard_pending_arg="--hard_pending_compaction_bytes_limit=$hard_pending_bytes" +fi + +o_direct_flags="" +if [ ! -z $USE_O_DIRECT ]; then + # TODO: deal with flags only supported in new versions, like prepopulate_block_cache + #o_direct_flags="--use_direct_reads --use_direct_io_for_flush_and_compaction --prepopulate_block_cache=1" + o_direct_flags="--use_direct_reads --use_direct_io_for_flush_and_compaction" +fi + +univ_min_merge_width=${UNIVERSAL_MIN_MERGE_WIDTH:-2} +univ_max_merge_width=${UNIVERSAL_MAX_MERGE_WIDTH:-20} +univ_size_ratio=${UNIVERSAL_SIZE_RATIO:-1} +univ_max_size_amp=${UNIVERSAL_MAX_SIZE_AMP:-200} + +if [ ! -z $UNIVERSAL_ALLOW_TRIVIAL_MOVE ]; then + univ_allow_trivial_move=1 +else + univ_allow_trivial_move=0 +fi + +min_blob_size=${MIN_BLOB_SIZE:-0} +blob_file_size=${BLOB_FILE_SIZE:-$(( 256 * $M ))} +blob_compression_type=${BLOB_COMPRESSION_TYPE:-lz4} +blob_gc_age_cutoff=${BLOB_GC_AGE_CUTOFF:-"0.25"} +blob_gc_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1} + +const_params_base=" --db=$DB_DIR \ --wal_dir=$WAL_DIR \ \ --num=$num_keys \ - --num_levels=6 \ + --num_levels=8 \ --key_size=$key_size \ --value_size=$value_size \ --block_size=$block_size \ @@ -134,49 +242,107 @@ const_params=" --compression_max_dict_bytes=$compression_max_dict_bytes \ --compression_ratio=0.5 \ --compression_type=$compression_type \ - --level_compaction_dynamic_level_bytes=true \ --bytes_per_sync=$((8 * M)) \ - --cache_index_and_filter_blocks=0 \ - --pin_l0_filter_and_index_blocks_in_cache=1 \ + $cache_meta_flags \ + $o_direct_flags \ --benchmark_write_rate_limit=$(( 1024 * 1024 * $mb_written_per_sec )) \ \ - --write_buffer_size=$((128 * M)) \ - --target_file_size_base=$((128 * M)) \ - --max_bytes_for_level_base=$((1 * G)) \ + --write_buffer_size=$(( $write_buffer_mb * M)) \ + --target_file_size_base=$(( $target_file_mb * M)) \ + --max_bytes_for_level_base=$(( $l1_mb * M)) \ \ --verify_checksum=1 \ --delete_obsolete_files_period_micros=$((60 * M)) \ - --max_bytes_for_level_multiplier=8 \ + --max_bytes_for_level_multiplier=$per_level_fanout \ \ --statistics=0 \ --stats_per_interval=1 \ - --stats_interval_seconds=60 \ + --stats_interval_seconds=$stats_interval_seconds \ + --report_interval_seconds=$report_interval_seconds \ --histogram=1 \ \ --memtablerep=skip_list \ --bloom_bits=10 \ --open_files=-1 \ + --subcompactions=$subcompactions \ \ $bench_args" -l0_config=" - --level0_file_num_compaction_trigger=4 \ - --level0_stop_writes_trigger=20" +level_const_params=" + $const_params_base \ + --compaction_style=0 \ + --min_level_to_compress=$min_level_to_compress \ + --level_compaction_dynamic_level_bytes=true \ + --pin_l0_filter_and_index_blocks_in_cache=1 \ + $soft_pending_arg \ + $hard_pending_arg \ +" +# TODO: these inherit level_const_params because the non-blob LSM tree uses leveled compaction +blob_const_params=" + $level_const_params \ + --enable_blob_files=true \ + --min_blob_size=$min_blob_size \ + --blob_file_size=$blob_file_size \ + --blob_compression_type=$blob_compression_type \ + --enable_blob_garbage_collection=true \ + --blob_garbage_collection_age_cutoff=$blob_gc_age_cutoff \ + --blob_garbage_collection_force_threshold=$blob_gc_force_threshold \ +" + +# TODO: +# pin_l0_filter_and..., is this OK? +univ_const_params=" + $const_params_base \ + --compaction_style=1 \ + --universal_compression_size_percent=$compression_size_percent \ + --pin_l0_filter_and_index_blocks_in_cache=1 \ + --universal_min_merge_width=$univ_min_merge_width \ + --universal_max_merge_width=$univ_max_merge_width \ + --universal_size_ratio=$univ_size_ratio \ + --universal_max_size_amplification_percent=$univ_max_size_amp \ + --universal_allow_trivial_move=$univ_allow_trivial_move \ +" + +if [ $compaction_style == "leveled" ]; then + const_params="$level_const_params" + l0_file_num_compaction_trigger=${LEVEL0_FILE_NUM_COMPACTION_TRIGGER:-4} + l0_slowdown_writes_trigger=${LEVEL0_SLOWDOWN_WRITES_TRIGGER:-20} + l0_stop_writes_trigger=${LEVEL0_STOP_WRITES_TRIGGER:-30} +elif [ $compaction_style == "universal" ]; then + const_params="$univ_const_params" + l0_file_num_compaction_trigger=${LEVEL0_FILE_NUM_COMPACTION_TRIGGER:-8} + l0_slowdown_writes_trigger=${LEVEL0_SLOWDOWN_WRITES_TRIGGER:-20} + l0_stop_writes_trigger=${LEVEL0_STOP_WRITES_TRIGGER:-30} +else + const_params="$blob_const_params" + l0_file_num_compaction_trigger=${LEVEL0_FILE_NUM_COMPACTION_TRIGGER:-4} + l0_slowdown_writes_trigger=${LEVEL0_SLOWDOWN_WRITES_TRIGGER:-20} + l0_stop_writes_trigger=${LEVEL0_STOP_WRITES_TRIGGER:-30} +fi + +l0_config=" + --level0_file_num_compaction_trigger=$l0_file_num_compaction_trigger \ + --level0_slowdown_writes_trigger=$l0_slowdown_writes_trigger \ + --level0_stop_writes_trigger=$l0_stop_writes_trigger" + +# You probably don't want to set both --writes and --duration if [ $duration -gt 0 ]; then const_params="$const_params --duration=$duration" fi +if [ $writes -gt 0 ]; then + const_params="$const_params --writes=$writes" +fi params_w="$l0_config \ - --max_background_compactions=16 \ + --max_background_jobs=$max_background_jobs \ --max_write_buffer_number=8 \ - --max_background_flushes=7 \ + $compact_bytes_limit \ $const_params" -params_bulkload="--max_background_compactions=16 \ +params_bulkload="--max_background_jobs=$max_background_jobs \ --max_write_buffer_number=8 \ --allow_concurrent_memtable_write=false \ - --max_background_flushes=7 \ --level0_file_num_compaction_trigger=$((10 * M)) \ --level0_slowdown_writes_trigger=$((10 * M)) \ --level0_stop_writes_trigger=$((10 * M)) \ @@ -204,6 +370,26 @@ params_univ_compact="$const_params \ --level0_slowdown_writes_trigger=16 \ --level0_stop_writes_trigger=20" +function get_cmd() { + output=$1 + + numa="" + if [ ! -z $NUMACTL ]; then + numa="numactl --interleave=all " + fi + + # Try to use timeout when duration is set because some tests (revrange*) hang + # for some versions (v6.10, v6.11). + timeout_cmd="" + if [ $duration -gt 0 ]; then + if hash timeout ; then + timeout_cmd="timeout $(( $duration + 600 ))" + fi + fi + + echo "/usr/bin/time -f '%e %U %S' -o $output $numa $timeout_cmd" +} + function month_to_num() { local date_str=$1 date_str="${date_str/Jan/01}" @@ -221,6 +407,45 @@ function month_to_num() { echo $date_str } +function start_stats { + output=$1 + iostat -y -mx 1 >& $output.io & + vmstat 1 >& $output.vm & + # tail -1 because "ps | grep db_bench" returns 2 entries and we want the second + while :; do ps aux | grep db_bench | grep -v grep | tail -1; sleep 10; done >& $output.ps & + # This sets a global value + pspid=$! + + while :; do + b_gb=$( ls -l $DB_DIR 2> /dev/null | grep blob | awk '{ c += 1; b += $5 } END { printf "%.1f", b / (1024*1024*1024) }' ) + s_gb=$( ls -l $DB_DIR 2> /dev/null | grep sst | awk '{ c += 1; b += $5 } END { printf "%.1f", b / (1024*1024*1024) }' ) + l_gb=$( ls -l $WAL_DIR 2> /dev/null | grep log | awk '{ c += 1; b += $5 } END { printf "%.1f", b / (1024*1024*1024) }' ) + a_gb=$( ls -l $DB_DIR 2> /dev/null | awk '{ c += 1; b += $5 } END { printf "%.1f", b / (1024*1024*1024) }' ) + ts=$( date +%H%M%S ) + echo -e "${a_gb}\t${s_gb}\t${l_gb}\t${b_gb}\t${ts}" + sleep 10 + done >& $output.sizes & + # This sets a global value + szpid=$! +} + +function stop_stats { + output=$1 + kill $pspid + kill $szpid + killall iostat + killall vmstat + sleep 1 + gzip $output.io + gzip $output.vm + + am=$( sort -nk 1,1 $output.sizes | tail -1 | awk '{ print $1 }' ) + sm=$( sort -nk 2,2 $output.sizes | tail -1 | awk '{ print $2 }' ) + lm=$( sort -nk 3,3 $output.sizes | tail -1 | awk '{ print $3 }' ) + bm=$( sort -nk 4,4 $output.sizes | tail -1 | awk '{ print $4 }' ) + echo -e "max sizes (GB): $am all, $sm sst, $lm log, $bm blob" >> $output.sizes +} + function summarize_result { test_out=$1 test_name=$2 @@ -231,39 +456,78 @@ function summarize_result { # happen then empty output from grep when searching for "Sum" will cause # syntax errors. version=$( grep ^RocksDB: $test_out | awk '{ print $3 }' ) - date=$( grep ^Date: $test_out | awk '{ print $6 "-" $3 "-" $4 "T" $5 ".000" }' ) - iso_date=$( month_to_num $date ) - tz=$( date "+%z" ) - iso_tz="${tz:0:3}:${tz:3:2}" - iso_date="$iso_date$iso_tz" + date=$( grep ^Date: $test_out | awk '{ print $6 "-" $3 "-" $4 "T" $5 }' ) + my_date=$( month_to_num $date ) uptime=$( grep ^Uptime\(secs $test_out | tail -1 | awk '{ printf "%.0f", $2 }' ) - stall_time=$( grep "^Cumulative stall" $test_out | tail -1 | awk '{ print $3 }' ) stall_pct=$( grep "^Cumulative stall" $test_out| tail -1 | awk '{ print $5 }' ) + nstall=$( grep ^Stalls\(count\): $test_out | tail -1 | awk '{ print $2 + $6 + $10 + $14 + $18 + $20 }' ) ops_sec=$( grep ^${bench_name} $test_out | awk '{ print $5 }' ) mb_sec=$( grep ^${bench_name} $test_out | awk '{ print $7 }' ) - l0_wgb=$( grep "^ L0" $test_out | tail -1 | awk '{ print $9 }' ) - sum_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ print $9 }' ) - sum_size=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.1f", $3 / 1024.0 }' ) - wamp=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.1f", $12 }' ) - if [[ "$sum_wgb" == "" ]]; then - wmb_ps="" + + flush_wgb=$( grep "^Flush(GB)" $test_out | tail -1 | awk '{ print $3 }' | tr ',' ' ' | awk '{ print $1 }' ) + sum_wgb=$( grep "^Cumulative compaction" $test_out | tail -1 | awk '{ printf "%.1f", $3 }' ) + cmb_ps=$( grep "^Cumulative compaction" $test_out | tail -1 | awk '{ printf "%.1f", $6 }' ) + if [[ "$sum_wgb" == "" || "$flush_wgb" == "" || "$flush_wgb" == "0.000" ]]; then + wamp="" else - wmb_ps=$( echo "scale=1; ( $sum_wgb * 1024.0 ) / $uptime" | bc ) + wamp=$( echo "$sum_wgb / $flush_wgb" | bc -l | awk '{ printf "%.1f", $1 }' ) fi + c_wsecs=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.0f", $15 }' ) + c_csecs=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.0f", $16 }' ) + + lsm_size=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.0f%s", $3, $4 }' ) + blob_size=$( grep "^Blob file count:" $test_out | tail -1 | awk '{ printf "%s%s", $7, $8 }' ) + + b_rgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.0f", $21 }' ) + b_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.0f", $22 }' ) + usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' ) p50=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.1f", $3 }' ) - p75=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.1f", $5 }' ) p99=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.0f", $7 }' ) p999=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.0f", $9 }' ) p9999=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.0f", $11 }' ) + pmax=$( grep "^Min: " $test_out | grep Median: | grep Max: | awk '{ printf "%.0f", $6 }' ) + + time_out=$test_out.time + u_cpu=$( awk '{ printf "%.1f", $2 / 1000.0 }' $time_out ) + s_cpu=$( awk '{ printf "%.1f", $3 / 1000.0 }' $time_out ) + + rss="na" + if [ -f $test_out.stats.ps ]; then + rss=$( tail -1 $test_out.stats.ps | awk '{ printf "%.1f\n", $6 / (1024 * 1024) }' ) + fi # if the report TSV (Tab Separate Values) file does not yet exist, create it and write the header row to it if [ ! -f "$report" ]; then - echo -e "ops_sec\tmb_sec\ttotal_size_gb\tlevel0_size_gb\tsum_gb\twrite_amplification\twrite_mbps\tusec_op\tpercentile_50\tpercentile_75\tpercentile_99\tpercentile_99.9\tpercentile_99.99\tuptime\tstall_time\tstall_percent\ttest_name\ttest_date\trocksdb_version\tjob_id" \ + echo -e "# ops_sec - operations per second" >> $report + echo -e "# mb_sec - ops_sec * size-of-operation-in-MB" >> $report + echo -e "# lsm_sz - size of LSM tree" >> $report + echo -e "# blob_sz - size of BlobDB logs" >> $report + echo -e "# c_wgb - GB written by compaction" >> $report + echo -e "# w_amp - Write-amplification as (bytes written by compaction / bytes written by memtable flush)" >> $report + echo -e "# c_mbps - Average write rate for compaction" >> $report + echo -e "# c_wsecs - Wall clock seconds doing compaction" >> $report + echo -e "# c_csecs - CPU seconds doing compaction" >> $report + echo -e "# b_rgb - Blob compaction read GB" >> $report + echo -e "# b_wgb - Blob compaction write GB" >> $report + echo -e "# usec_op - Microseconds per operation" >> $report + echo -e "# p50, p99, p99.9, p99.99 - 50th, 99th, 99.9th, 99.99th percentile response time in usecs" >> $report + echo -e "# pmax - max response time in usecs" >> $report + echo -e "# uptime - RocksDB uptime in seconds" >> $report + echo -e "# stall% - Percentage of time writes are stalled" >> $report + echo -e "# Nstall - Number of stalls" >> $report + echo -e "# u_cpu - #seconds/1000 of user CPU" >> $report + echo -e "# s_cpu - #seconds/1000 of system CPU" >> $report + echo -e "# rss - max RSS in GB for db_bench process" >> $report + echo -e "# test - Name of test" >> $report + echo -e "# date - Date/time of test" >> $report + echo -e "# version - RocksDB version" >> $report + echo -e "# job_id - User-provided job ID" >> $report + echo -e "ops_sec\tmb_sec\tlsm_sz\tblob_sz\tc_wgb\tw_amp\tc_mbps\tc_wsecs\tc_csecs\tb_rgb\tb_wgb\tusec_op\tp50\tp99\tp99.9\tp99.99\tpmax\tuptime\tstall%\tNstall\tu_cpu\ts_cpu\trss\ttest\tdate\tversion\tjob_id" \ >> $report fi - echo -e "$ops_sec\t$mb_sec\t$sum_size\t$l0_wgb\t$sum_wgb\t$wamp\t$wmb_ps\t$usecs_op\t$p50\t$p75\t$p99\t$p999\t$p9999\t$uptime\t$stall_time\t$stall_pct\t$test_name\t$iso_date\t$version\t$job_id" \ + echo -e "$ops_sec\t$mb_sec\t$lsm_size\t$blob_size\t$sum_wgb\t$wamp\t$cmb_ps\t$c_wsecs\t$c_csecs\t$b_rgb\t$b_wgb\t$usecs_op\t$p50\t$p99\t$p999\t$p9999\t$pmax\t$uptime\t$stall_pct\t$nstall\t$u_cpu\t$s_cpu\t$rss\t$test_name\t$my_date\t$version\t$job_id" \ >> $report } @@ -272,7 +536,8 @@ function run_bulkload { # client can discover where to restart a load after a crash. I think this is a good way to load. echo "Bulk loading $num_keys random keys" log_file_name=$output_dir/benchmark_bulkload_fillrandom.log - cmd="./db_bench --benchmarks=fillrandom \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=fillrandom \ --use_existing_db=0 \ --disable_auto_compactions=1 \ --sync=0 \ @@ -282,6 +547,7 @@ function run_bulkload { --allow_concurrent_memtable_write=false \ --disable_wal=1 \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name @@ -294,7 +560,8 @@ function run_bulkload { echo "Compacting..." log_file_name=$output_dir/benchmark_bulkload_compact.log - cmd="./db_bench --benchmarks=compact \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=compact \ --use_existing_db=1 \ --disable_auto_compactions=1 \ --sync=0 \ @@ -333,7 +600,8 @@ function run_manual_compaction_worker { fi # Make sure that fillrandom uses the same compaction options as compact. - cmd="./db_bench --benchmarks=fillrandom \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=fillrandom \ --use_existing_db=0 \ --disable_auto_compactions=0 \ --sync=0 \ @@ -432,42 +700,106 @@ function run_fillseq { test_name=fillseq.wal_enabled.v${value_size} fi + # For Leveled compaction hardwire this to 0 so that data that is trivial-moved + # to larger levels (3, 4, etc) will be compressed. + if [ $compaction_style == "leveled" ]; then + comp_arg="--min_level_to_compress=0" + elif [ $compaction_style == "universal" ]; then + if [ ! -z $UNIVERSAL_ALLOW_TRIVIAL_MOVE ]; then + # See GetCompressionFlush where compression_size_percent < 1 means use the default + # compression which is needed because trivial moves are enabled + comp_arg="--universal_compression_size_percent=-1" + else + # See GetCompressionFlush where compression_size_percent > 0 means no compression. + # Don't set anything here because compression_size_percent is set in univ_const_params + comp_arg="" + fi + else + # TODO: try to match what is done for leveled, although compression might not be needed + comp_arg="--min_level_to_compress=0" + fi + echo "Loading $num_keys keys sequentially" - cmd="./db_bench --benchmarks=fillseq \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=fillseq \ + $params_fillseq \ + $comp_arg \ --use_existing_db=0 \ --sync=0 \ - $params_fillseq \ - --min_level_to_compress=0 \ --threads=1 \ --memtablerep=vector \ --allow_concurrent_memtable_write=false \ --disable_wal=$1 \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" - if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name echo $cmd | tee -a $log_file_name else echo $cmd | tee $log_file_name fi + start_stats $log_file_name.stats eval $cmd + stop_stats $log_file_name.stats # The constant "fillseq" which we pass to db_bench is the benchmark name. summarize_result $log_file_name $test_name fillseq } +function run_lsm { + # This flushes the memtable and L0 to get the LSM tree into a deterministic + # state for read-only tests that will follow. + echo "Flush memtable, wait, compact L0, wait" + job=$1 + + if [ $job = flush_mt_l0 ]; then + benchmarks=levelstats,flush,waitforcompaction,compact0,waitforcompaction,memstats,levelstats + elif [ $job = waitforcompaction ]; then + benchmarks=levelstats,waitforcompaction,memstats,levelstats + else + echo Job unknown: $job + exit $EXIT_NOT_COMPACTION_TEST + fi + + log_file_name=$output_dir/benchmark_${job}.log + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=$benchmarks \ + --use_existing_db=1 \ + --sync=0 \ + $params_w \ + --threads=1 \ + --seed=$( date +%s ) \ + 2>&1 | tee -a $log_file_name" + if [[ "$job_id" != "" ]]; then + echo "Job ID: ${job_id}" > $log_file_name + echo $cmd | tee -a $log_file_name + else + echo $cmd | tee $log_file_name + fi + start_stats $log_file_name.stats + # waitforcompaction can hang with universal (compaction_style=1) + # see bug https://github.com/facebook/rocksdb/issues/9275 + eval $cmd + stop_stats $log_file_name.stats + # Don't summarize, the log doesn't have the output needed for it +} + function run_change { - operation=$1 - echo "Do $num_keys random $operation" - log_file_name="$output_dir/benchmark_${operation}.t${num_threads}.s${syncval}.log" - cmd="./db_bench --benchmarks=$operation \ + output_name=$1 + grep_name=$2 + benchmarks=$3 + echo "Do $num_keys random $output_name" + log_file_name="$output_dir/benchmark_${output_name}.t${num_threads}.s${syncval}.log" + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=$benchmarks \ --use_existing_db=1 \ --sync=$syncval \ $params_w \ --threads=$num_threads \ --merge_operator=\"put\" \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name @@ -475,19 +807,23 @@ function run_change { else echo $cmd | tee $log_file_name fi + start_stats $log_file_name.stats eval $cmd - summarize_result $log_file_name ${operation}.t${num_threads}.s${syncval} $operation + stop_stats $log_file_name.stats + summarize_result $log_file_name ${output_name}.t${num_threads}.s${syncval} $grep_name } function run_filluniquerandom { echo "Loading $num_keys unique keys randomly" log_file_name=$output_dir/benchmark_filluniquerandom.log - cmd="./db_bench --benchmarks=filluniquerandom \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=filluniquerandom \ --use_existing_db=0 \ --sync=0 \ $params_w \ --threads=1 \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name @@ -495,18 +831,22 @@ function run_filluniquerandom { else echo $cmd | tee $log_file_name fi + start_stats $log_file_name.stats eval $cmd + stop_stats $log_file_name.stats summarize_result $log_file_name filluniquerandom filluniquerandom } function run_readrandom { echo "Reading $num_keys random keys" log_file_name="${output_dir}/benchmark_readrandom.t${num_threads}.log" - cmd="./db_bench --benchmarks=readrandom \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=readrandom \ --use_existing_db=1 \ $params_w \ --threads=$num_threads \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name @@ -514,19 +854,23 @@ function run_readrandom { else echo $cmd | tee $log_file_name fi + start_stats $log_file_name.stats eval $cmd + stop_stats $log_file_name.stats summarize_result $log_file_name readrandom.t${num_threads} readrandom } function run_multireadrandom { echo "Multi-Reading $num_keys random keys" log_file_name="${output_dir}/benchmark_multireadrandom.t${num_threads}.log" - cmd="./db_bench --benchmarks=multireadrandom \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=multireadrandom \ --use_existing_db=1 \ --threads=$num_threads \ --batch_size=10 \ $params_w \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name @@ -534,7 +878,9 @@ function run_multireadrandom { else echo $cmd | tee $log_file_name fi + start_stats $log_file_name.stats eval $cmd + stop_stats $log_file_name.stats summarize_result $log_file_name multireadrandom.t${num_threads} multireadrandom } @@ -542,13 +888,15 @@ function run_readwhile { operation=$1 echo "Reading $num_keys random keys while $operation" log_file_name="${output_dir}/benchmark_readwhile${operation}.t${num_threads}.log" - cmd="./db_bench --benchmarks=readwhile${operation} \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench --benchmarks=readwhile${operation} \ --use_existing_db=1 \ --sync=$syncval \ $params_w \ --threads=$num_threads \ --merge_operator=\"put\" \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name @@ -556,7 +904,9 @@ function run_readwhile { else echo $cmd | tee $log_file_name fi + start_stats $log_file_name.stats eval $cmd + stop_stats $log_file_name.stats summarize_result $log_file_name readwhile${operation}.t${num_threads} readwhile${operation} } @@ -565,8 +915,9 @@ function run_rangewhile { full_name=$2 reverse_arg=$3 log_file_name="${output_dir}/benchmark_${full_name}.t${num_threads}.log" + time_cmd=$( get_cmd $log_file_name.time ) echo "Range scan $num_keys random keys while ${operation} for reverse_iter=${reverse_arg}" - cmd="./db_bench --benchmarks=seekrandomwhile${operation} \ + cmd="$time_cmd ./db_bench --benchmarks=seekrandomwhile${operation} \ --use_existing_db=1 \ --sync=$syncval \ $params_w \ @@ -575,9 +926,12 @@ function run_rangewhile { --seek_nexts=$num_nexts_per_seek \ --reverse_iterator=$reverse_arg \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" echo $cmd | tee $log_file_name + start_stats $log_file_name.stats eval $cmd + stop_stats $log_file_name.stats summarize_result $log_file_name ${full_name}.t${num_threads} seekrandomwhile${operation} } @@ -585,14 +939,16 @@ function run_range { full_name=$1 reverse_arg=$2 log_file_name="${output_dir}/benchmark_${full_name}.t${num_threads}.log" + time_cmd=$( get_cmd $log_file_name.time ) echo "Range scan $num_keys random keys for reverse_iter=${reverse_arg}" - cmd="./db_bench --benchmarks=seekrandom \ + cmd="$time_cmd ./db_bench --benchmarks=seekrandom \ --use_existing_db=1 \ $params_w \ --threads=$num_threads \ --seek_nexts=$num_nexts_per_seek \ --reverse_iterator=$reverse_arg \ --seed=$( date +%s ) \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee -a $log_file_name" if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name @@ -600,18 +956,22 @@ function run_range { else echo $cmd | tee $log_file_name fi + start_stats $log_file_name.stats eval $cmd + stop_stats $log_file_name.stats summarize_result $log_file_name ${full_name}.t${num_threads} seekrandom } function run_randomtransaction { echo "..." log_file_name=$output_dir/benchmark_randomtransaction.log - cmd="./db_bench $params_r --benchmarks=randomtransaction \ + time_cmd=$( get_cmd $log_file_name.time ) + cmd="$time_cmd ./db_bench $params_r --benchmarks=randomtransaction \ --num=$num_keys \ --transaction_db \ --threads=5 \ --transaction_sets=5 \ + --report_file=${log_file_name}.r.csv \ 2>&1 | tee $log_file_name" if [[ "$job_id" != "" ]]; then echo "Job ID: ${job_id}" > $log_file_name @@ -619,7 +979,9 @@ function run_randomtransaction { else echo $cmd | tee $log_file_name fi + start_stats $log_file_name.stats eval $cmd + stop_stats $log_file_name.stats } function now() { @@ -641,22 +1003,25 @@ for job in ${jobs[@]}; do start=$(now) if [ $job = bulkload ]; then run_bulkload + elif [ $job = flush_mt_l0 ]; then + run_lsm flush_mt_l0 + elif [ $job = waitforcompaction ]; then + run_lsm waitforcompaction elif [ $job = fillseq_disable_wal ]; then run_fillseq 1 elif [ $job = fillseq_enable_wal ]; then run_fillseq 0 elif [ $job = overwrite ]; then - syncval="0" - params_w="$params_w \ - --writes=125000000 \ - --subcompactions=4 \ - --soft_pending_compaction_bytes_limit=$((1 * T)) \ - --hard_pending_compaction_bytes_limit=$((4 * T)) " - run_change overwrite + run_change overwrite overwrite overwrite + elif [ $job = overwritesome ]; then + # This uses a different name for overwrite results so it can be run twice in one benchmark run. + run_change overwritesome overwrite overwrite + elif [ $job = overwriteandwait ]; then + run_change overwriteandwait overwrite overwrite,waitforcompaction elif [ $job = updaterandom ]; then - run_change updaterandom + run_change updaterandom updaterandom updaterandom elif [ $job = mergerandom ]; then - run_change mergerandom + run_change mergerandom mergerandom mergerandom elif [ $job = filluniquerandom ]; then run_filluniquerandom elif [ $job = readrandom ]; then @@ -696,7 +1061,7 @@ for job in ${jobs[@]}; do echo "Completed $job (ID: $job_id) in $((end-start)) seconds" | tee -a $schedule fi - echo -e "ops/sec\tmb/sec\tSize-GB\tL0_GB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tp99.9\tp99.99\tUptime\tStall-time\tStall%\tTest\tDate\tVersion\tJob-ID" + echo -e "ops_sec\tmb_sec\tlsm_sz\tblob_sz\tc_wgb\tw_amp\tc_mbps\tc_wsecs\tc_csecs\tb_rgb\tb_wgb\tusec_op\tp50\tp99\tp99.9\tp99.99\tpmax\tuptime\tstall%\tNstall\tu_cpu\ts_cpu\trss\ttest\tdate\tversion\tjob_id" tail -1 $report done