rocksdb/tools/benchmark_leveldb.sh

#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# REQUIRE: db_bench binary exists in the current directory
#
# This should be used with the LevelDB fork listed here to use additional test options.
# For more details on the changes see the blog post listed below.
#   https://github.com/mdcallag/leveldb-1
#   http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html

if [ $# -ne 1 ]; then
  echo -n "./benchmark.sh [fillseq/overwrite/readrandom/readwhilewriting]"
  exit 0
fi

# size constants
K=1024
M=$((1024 * K))
G=$((1024 * M))

if [ -z $DB_DIR ]; then
  echo "DB_DIR is not defined"
  exit 0
fi

output_dir=${OUTPUT_DIR:-/tmp/}
if [ ! -d $output_dir ]; then
  mkdir -p $output_dir
fi

# all multithreaded tests run with sync=1 unless
# $DB_BENCH_NO_SYNC is defined
syncval="1"
if [ ! -z $DB_BENCH_NO_SYNC ]; then
  echo "Turning sync off for all multithreaded tests"
  syncval="0";
fi

num_threads=${NUM_THREADS:-16}
# Only for *whilewriting, *whilemerging
writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}
cache_size=${CACHE_SIZE:-$((1 * G))}

num_keys=${NUM_KEYS:-$((1 * G))}
key_size=20
value_size=${VALUE_SIZE:-400}
block_size=${BLOCK_SIZE:-4096}

const_params="
  --db=$DB_DIR \
  \
  --num=$num_keys \
  --value_size=$value_size \
  --cache_size=$cache_size \
  --compression_ratio=0.5 \
  \
  --write_buffer_size=$((2 * M)) \
  \
  --histogram=1 \
  \
  --bloom_bits=10 \
  --open_files=$((20 * K))"

params_w="$const_params "

function summarize_result {
  test_out=$1
  test_name=$2
  bench_name=$3
  nthr=$4

  usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' )
  mb_sec=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $5 }' )
  ops=$( grep "^Count:" $test_out | awk '{ print $2 }' )
  ops_sec=$( echo "scale=0; (1000000.0 * $nthr) / $usecs_op" | bc )
  avg=$( grep "^Count:" $test_out | awk '{ printf "%.1f", $4 }' )
  p50=$( grep "^Min:" $test_out | awk '{ printf "%.1f", $4 }' )
  echo -e "$ops_sec\t$mb_sec\t$usecs_op\t$avg\t$p50\t$test_name" \
    >> $output_dir/report.txt
}

function run_fillseq {
  # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the
  # client can discover where to restart a load after a crash. I think this is a good way to load.
  echo "Loading $num_keys keys sequentially"
  cmd="./db_bench --benchmarks=fillseq \
       --use_existing_db=0 \
       --sync=0 \
       $params_w \
       --threads=1 \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/benchmark_fillseq.v${value_size}.log"
  echo $cmd | tee $output_dir/benchmark_fillseq.v${value_size}.log
  eval $cmd
  summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq 1
}

function run_change {
  operation=$1
  echo "Do $num_keys random $operation"
  out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
  cmd="./db_bench --benchmarks=$operation \
       --use_existing_db=1 \
       --sync=$syncval \
       $params_w \
       --threads=$num_threads \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
  eval $cmd
  summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation $num_threads
}

function run_readrandom {
  echo "Reading $num_keys random keys"
  out_name="benchmark_readrandom.t${num_threads}.log"
  cmd="./db_bench --benchmarks=readrandom \
       --use_existing_db=1 \
       $params_w \
       --threads=$num_threads \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
  eval $cmd
  summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom $num_threads
}

function run_readwhile {
  operation=$1
  echo "Reading $num_keys random keys while $operation"
  out_name="benchmark_readwhile${operation}.t${num_threads}.log"
  cmd="./db_bench --benchmarks=readwhile${operation} \
       --use_existing_db=1 \
       --sync=$syncval \
       $params_w \
       --threads=$num_threads \
       --writes_per_second=$writes_per_second \
       --seed=$( date +%s ) \
       2>&1 | tee -a $output_dir/${out_name}"
  echo $cmd | tee $output_dir/${out_name}
  eval $cmd
  summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation} $num_threads
}

function now() {
  echo `date +"%s"`
}

report="$output_dir/report.txt"
schedule="$output_dir/schedule.txt"

echo "===== Benchmark ====="

# Run!!!
IFS=',' read -a jobs <<< $1
# shellcheck disable=SC2068
for job in ${jobs[@]}; do

  if [ $job != debug ]; then
    echo "Start $job at `date`" | tee -a $schedule
  fi

  start=$(now)
  if [ $job = fillseq ]; then
    run_fillseq
  elif [ $job = overwrite ]; then
    run_change overwrite
  elif [ $job = readrandom ]; then
    run_readrandom
  elif [ $job = readwhilewriting ]; then
    run_readwhile writing
  elif [ $job = debug ]; then
    num_keys=1000; # debug
    echo "Setting num_keys to $num_keys"
  else
    echo "unknown job $job"
    exit
  fi
  end=$(now)

  if [ $job != debug ]; then
    echo "Complete $job in $((end-start)) seconds" | tee -a $schedule
  fi

  echo -e "ops/sec\tmb/sec\tusec/op\tavg\tp50\tTest"
  tail -1 $output_dir/report.txt

done
Fix /bin/bash shebangs Summary: "/bin/bash" is a Linuxism. "/usr/bin/env bash" is portable. Closes https://github.com/facebook/rocksdb/pull/2646 Differential Revision: D5556259 Pulled By: ajkr fbshipit-source-id: cbffd38ecdbfffb2438969ec007ab345ed893ccb 2017-08-04 00:43:05 +02:00			`#!/usr/bin/env bash`
Add copyright headers per FB open-source checkup tool. (#5199) Summary: internal task: T35568575 Pull Request resolved: https://github.com/facebook/rocksdb/pull/5199 Differential Revision: D14962794 Pulled By: gfosco fbshipit-source-id: 93838ede6d0235eaecff90d200faed9a8515bbbe 2019-04-18 19:51:19 +02:00			`# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.`
Add scripts to run leveldb benchmark Summary: This runs a benchmark for LevelDB similar to what we have in tools/run_flash_bench.sh. It requires changes to db_bench that I published in a LevelDB fork on github. Some results are at: http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html Sample output: ops/sec mb/sec usec/op avg p50 Test 525 16.4 1904.5 1904.5 111.0 fillseq.v32768 75187 15.5 13.3 13.3 4.4 fillseq.v200 28328 5.8 35.3 35.3 4.7 overwrite.t1.s0 175438 0.0 5.7 5.7 4.4 readrandom.t1 28490 5.9 35.1 35.1 4.7 overwrite.t1.s0 121951 0.0 8.2 8.2 5.7 readwhilewriting.t1 Task ID: # Blame Rev: Test Plan: Revert Plan: Database Impact: Memcache Impact: Other Notes: EImportant: - begin PUBLIC platform impact section - Bugzilla: # - end platform impact - Reviewers: igor Reviewed By: igor Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D37749 2015-04-28 04:32:56 +02:00			`# REQUIRE: db_bench binary exists in the current directory`
			`#`
			`# This should be used with the LevelDB fork listed here to use additional test options.`
			`# For more details on the changes see the blog post listed below.`
			`# https://github.com/mdcallag/leveldb-1`
			`# http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html`

			`if [ $# -ne 1 ]; then`
			`echo -n "./benchmark.sh [fillseq/overwrite/readrandom/readwhilewriting]"`
			`exit 0`
			`fi`

			`# size constants`
			`K=1024`
			`M=$((1024 * K))`
			`G=$((1024 * M))`

			`if [ -z $DB_DIR ]; then`
			`echo "DB_DIR is not defined"`
			`exit 0`
			`fi`

			`output_dir=${OUTPUT_DIR:-/tmp/}`
			`if [ ! -d $output_dir ]; then`
			`mkdir -p $output_dir`
			`fi`

			`# all multithreaded tests run with sync=1 unless`
			`# $DB_BENCH_NO_SYNC is defined`
			`syncval="1"`
			`if [ ! -z $DB_BENCH_NO_SYNC ]; then`
			`echo "Turning sync off for all multithreaded tests"`
			`syncval="0";`
			`fi`

			`num_threads=${NUM_THREADS:-16}`
			`# Only for whilewriting, whilemerging`
			`writes_per_second=${WRITES_PER_SECOND:-$((10 * K))}`
			`cache_size=${CACHE_SIZE:-$((1 * G))}`

			`num_keys=${NUM_KEYS:-$((1 * G))}`
			`key_size=20`
			`value_size=${VALUE_SIZE:-400}`
			`block_size=${BLOCK_SIZE:-4096}`

			`const_params="`
			`--db=$DB_DIR \`
			`\`
			`--num=$num_keys \`
			`--value_size=$value_size \`
			`--cache_size=$cache_size \`
			`--compression_ratio=0.5 \`
			`\`
			`--write_buffer_size=$((2 * M)) \`
			`\`
			`--histogram=1 \`
			`\`
			`--bloom_bits=10 \`
			`--open_files=$((20 * K))"`

			`params_w="$const_params "`

			`function summarize_result {`
			`test_out=$1`
			`test_name=$2`
			`bench_name=$3`
			`nthr=$4`

			`usecs_op=$( grep ^${bench_name} $test_out \| awk '{ printf "%.1f", $3 }' )`
			`mb_sec=$( grep ^${bench_name} $test_out \| awk '{ printf "%.1f", $5 }' )`
			`ops=$( grep "^Count:" $test_out \| awk '{ print $2 }' )`
			`ops_sec=$( echo "scale=0; (1000000.0 * $nthr) / $usecs_op" \| bc )`
			`avg=$( grep "^Count:" $test_out \| awk '{ printf "%.1f", $4 }' )`
			`p50=$( grep "^Min:" $test_out \| awk '{ printf "%.1f", $4 }' )`
			`echo -e "$ops_sec\t$mb_sec\t$usecs_op\t$avg\t$p50\t$test_name" \`
			`>> $output_dir/report.txt`
			`}`

			`function run_fillseq {`
			`# This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the`
			`# client can discover where to restart a load after a crash. I think this is a good way to load.`
			`echo "Loading $num_keys keys sequentially"`
			`cmd="./db_bench --benchmarks=fillseq \`
			`--use_existing_db=0 \`
			`--sync=0 \`
			`$params_w \`
			`--threads=1 \`
			`--seed=$( date +%s ) \`
			`2>&1 \| tee -a $output_dir/benchmark_fillseq.v${value_size}.log"`
			`echo $cmd \| tee $output_dir/benchmark_fillseq.v${value_size}.log`
			`eval $cmd`
			`summarize_result $output_dir/benchmark_fillseq.v${value_size}.log fillseq.v${value_size} fillseq 1`
			`}`

			`function run_change {`
			`operation=$1`
			`echo "Do $num_keys random $operation"`
			`out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"`
			`cmd="./db_bench --benchmarks=$operation \`
			`--use_existing_db=1 \`
			`--sync=$syncval \`
			`$params_w \`
			`--threads=$num_threads \`
			`--seed=$( date +%s ) \`
			`2>&1 \| tee -a $output_dir/${out_name}"`
			`echo $cmd \| tee $output_dir/${out_name}`
			`eval $cmd`
			`summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation $num_threads`
			`}`

			`function run_readrandom {`
			`echo "Reading $num_keys random keys"`
			`out_name="benchmark_readrandom.t${num_threads}.log"`
			`cmd="./db_bench --benchmarks=readrandom \`
			`--use_existing_db=1 \`
			`$params_w \`
			`--threads=$num_threads \`
			`--seed=$( date +%s ) \`
			`2>&1 \| tee -a $output_dir/${out_name}"`
			`echo $cmd \| tee $output_dir/${out_name}`
			`eval $cmd`
			`summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom $num_threads`
			`}`

			`function run_readwhile {`
			`operation=$1`
			`echo "Reading $num_keys random keys while $operation"`
			`out_name="benchmark_readwhile${operation}.t${num_threads}.log"`
			`cmd="./db_bench --benchmarks=readwhile${operation} \`
			`--use_existing_db=1 \`
			`--sync=$syncval \`
			`$params_w \`
			`--threads=$num_threads \`
			`--writes_per_second=$writes_per_second \`
			`--seed=$( date +%s ) \`
			`2>&1 \| tee -a $output_dir/${out_name}"`
			`echo $cmd \| tee $output_dir/${out_name}`
			`eval $cmd`
			`summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation} $num_threads`
			`}`

			`function now() {`
			echo `date +"%s"`
			`}`

			`report="$output_dir/report.txt"`
			`schedule="$output_dir/schedule.txt"`

			`echo "===== Benchmark ====="`

			`# Run!!!`
			`IFS=',' read -a jobs <<< $1`
Suppress lint in old files Summary: Grandfather in super old lint issues to make a clean slate for moving forward that allows us to have stronger enforcement on new issues. Reviewed By: yiwu-arbug Differential Revision: D6821806 fbshipit-source-id: 22797d31ec58e9eb0255d3b66fedfcfcb0dc127c 2018-01-29 21:43:56 +01:00			`# shellcheck disable=SC2068`
Add scripts to run leveldb benchmark Summary: This runs a benchmark for LevelDB similar to what we have in tools/run_flash_bench.sh. It requires changes to db_bench that I published in a LevelDB fork on github. Some results are at: http://smalldatum.blogspot.com/2015/04/comparing-leveldb-and-rocksdb-take-2.html Sample output: ops/sec mb/sec usec/op avg p50 Test 525 16.4 1904.5 1904.5 111.0 fillseq.v32768 75187 15.5 13.3 13.3 4.4 fillseq.v200 28328 5.8 35.3 35.3 4.7 overwrite.t1.s0 175438 0.0 5.7 5.7 4.4 readrandom.t1 28490 5.9 35.1 35.1 4.7 overwrite.t1.s0 121951 0.0 8.2 8.2 5.7 readwhilewriting.t1 Task ID: # Blame Rev: Test Plan: Revert Plan: Database Impact: Memcache Impact: Other Notes: EImportant: - begin PUBLIC platform impact section - Bugzilla: # - end platform impact - Reviewers: igor Reviewed By: igor Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D37749 2015-04-28 04:32:56 +02:00			`for job in ${jobs[@]}; do`

			`if [ $job != debug ]; then`
			echo "Start $job at `date`" \| tee -a $schedule
			`fi`

			`start=$(now)`
			`if [ $job = fillseq ]; then`
			`run_fillseq`
			`elif [ $job = overwrite ]; then`
			`run_change overwrite`
			`elif [ $job = readrandom ]; then`
			`run_readrandom`
			`elif [ $job = readwhilewriting ]; then`
			`run_readwhile writing`
			`elif [ $job = debug ]; then`
			`num_keys=1000; # debug`
			`echo "Setting num_keys to $num_keys"`
			`else`
			`echo "unknown job $job"`
			`exit`
			`fi`
			`end=$(now)`

			`if [ $job != debug ]; then`
			`echo "Complete $job in $((end-start)) seconds" \| tee -a $schedule`
			`fi`

			`echo -e "ops/sec\tmb/sec\tusec/op\tavg\tp50\tTest"`
			`tail -1 $output_dir/report.txt`

			`done`