Allow regression_test.sh to specify OPTIONS_FILE. Add header comments.

Summary:
This patch does the following improvement for regression_test.sh
* Allow regression_test.sh to specify OPTIONS_FILE.
* Add header comments that includes examples on how to run the script
  and introduce all configurable parameters.
* bug fix.

Test Plan: Run the example commands in the header comments of regression_test.sh

Reviewers: sdong, yiwu, gunnarku

Reviewed By: gunnarku

Subscribers: andrewkr, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D59175
This commit is contained in:
Yueh-Hsuan Chiang 2016-06-06 22:57:46 -07:00
parent 0fee896841
commit fda098461b

View File

@ -1,5 +1,81 @@
#!/bin/bash
# REQUIRE: db_bench binary exists in the current directory
# The RocksDB regression test script.
# REQUIREMENT: must be able to run make db_bench in the current directory
#
# This script will do the following things in order:
#
# 1. check out the specified rocksdb commit.
# 2. build db_bench using the specified commit
# 3. setup test directory $TEST_PATH. If not specified, then the test directory
# will be "/tmp/rocksdb/regression_test"
# 4. run set of benchmarks on the specified host
# (can be either locally or remotely)
# 5. generate report in the $RESULT_PATH. If RESULT_PATH is not specified,
# RESULT_PATH will be set to $TEST_PATH/current_time
#
# = Examples =
# * Run the regression test using rocksdb commit abcdef that outputs results
# and temp files in "/my/output/dir"
#
# TEST_PATH=/my/output/dir COMMIT_ID=abcdef ./tools/regression_test.sh
#
# * Run the regression test on a remost host under "/my/output/dir" directory
# and stores the result locally in "/my/benchmark/results" using commit
# abcdef and with the rocksdb options specified in /my/path/to/OPTIONS-012345
# with 1000000000 keys in each benchmark in the regression test where each
# key and value are 100 and 900 bytes respectively:
#
# REMOTE_USER_AT_HOST=yhchiang@my.remote.host \
# TEST_PATH=/my/output/dir \
# RESULT_PATH=/my/benchmark/results \
# COMMIT_ID=abcdef \
# OPTIONS_FILE=/my/path/to/OPTIONS-012345 \
# NUM_KEYS=1000000000 \
# KEY_SIZE=100 \
# VALUE_SIZE=900 \
# ./tools/regression_test.sh
#
# = Regression test environmental parameters =
# TEST_PATH: the root directory of the regression test.
# Default: "/tmp/rocksdb/regression_test"
# RESULT_PATH: the directory where the regression results will be generated.
# Default: "$TEST_PATH/current_time"
# REMOTE_USER_AT_HOST: If set, then test will run on the specified host under
# TEST_PATH directory and outputs test results locally in RESULT_PATH
# The REMOTE_USER_AT_HOST should follow the format user-id@host.name
# DB_PATH: the path where the rocksdb database will be created during the
# regression test. Default: $TEST_PATH/db
# WAL_PATH: the path where the rocksdb WAL will be outputed.
# Default: $TEST_PATH/wal
# OPTIONS_FILE: If specified, then the regression test will use the specified
# file to initialize the RocksDB options in its benchmarks. Note that
# this feature only work for commits after 88acd93 or rocksdb version
# later than 4.9.
#
# = db_bench parameters =
# NUM_THREADS: The number of concurrent foreground threads that will issue
# database operations in the benchmark. Default: 16.
# NUM_KEYS: The number of keys issued by each thread in the benchmark.
# Default: 1G.
# KEY_SIZE: The size of each key in bytes in db_bench. Default: 100.
# VALUE_SIZE: The size of each value in bytes in db_bench. Default: 900.
# CACHE_SIZE: The size of RocksDB block cache used in db_bench. Default: 1G
# STATISTICS: If 1, then statistics is on in db_bench. Default: 0.
# COMPRESSION_RATIO: The compression ratio of the key generated in db_bench.
# Default: 0.5.
# HISTOGRAM: If 1, then the histogram feature on performance feature is on.
# STATS_PER_INTERVAL: If 1, then the statistics will be reported for every
# STATS_INTERVAL_SECONDS seconds. Default 1.
# STATS_INTERVAL_SECONDS: If STATS_PER_INTERVAL is set to 1, then statistics
# will be reported for every STATS_INTERVAL_SECONDS. Default 60.
# MAX_BACKGROUND_FLUSHES: The maxinum number of concurrent flushes in
# db_bench. Default: 4.
# MAX_BACKGROUND_COMPACTIONS: The maximum number of concurrent compactions
# in db_bench. Default: 16.
# SEEK_NEXTS: Controls how many Next() will be called after seek.
# Default: 10.
# SEED: random seed that controls the randomness of the benchmark.
# Default: $( date +%s )
function main {
commit=${1:-"origin/master"}
@ -32,18 +108,20 @@ function init_arguments {
G=$((1024 * M))
current_time=$(date +"%F-%H:%M:%S")
RESULT_PATH=${2:-"$1/results/$current_time"}
RESULT_PATH=${RESULT_PATH:-"$1/results/$current_time"}
COMMIT_ID=`git log | head -n1 | cut -c 8-`
SUMMARY_FILE="$RESULT_PATH/SUMMARY.csv"
DB_PATH=${3:-"$1/db/"}
WAL_PATH=${4:-"$1/wal/"}
if [ -z "$REMOTE_HOST_USER" ]; then
if [ -z "$REMOTE_USER_AT_HOST" ]; then
DB_BENCH_DIR=${5:-"."}
else
DB_BENCH_DIR=${5:-"$1/db_bench"}
fi
SCP=${SCP:-"scp"}
SSH=${SSH:-"ssh"}
NUM_THREADS=${NUM_THREADS:-16}
NUM_KEYS=${NUM_KEYS:-$((1 * G))}
KEY_SIZE=${KEY_SIZE:-100}
@ -73,6 +151,7 @@ function run_db_bench {
echo "======================================================================="
echo ""
db_bench_error=0
options_file_arg=$(setup_options_file)
db_bench_cmd="$DB_BENCH_DIR/db_bench \
--benchmarks=$1 --db=$DB_PATH --wal_dir=$WAL_PATH \
--use_existing_db=$USE_EXISTING_DB \
@ -82,6 +161,7 @@ function run_db_bench {
--value_size=$VALUE_SIZE \
--cache_size=$CACHE_SIZE \
--statistics=$STATISTICS \
$options_file_arg \
--compression_ratio=$COMPRESSION_RATIO \
--histogram=$HISTOGRAM \
--seek_nexts=$SEEK_NEXTS \
@ -92,10 +172,11 @@ function run_db_bench {
--seed=$SEED 2>&1"
kill_db_bench_cmd="pkill db_bench"
ps_cmd="ps aux"
if ! [ -z "$REMOTE_HOST_USER" ]; then
kill_db_bench_cmd="$SSH $REMOTE_HOST_USER $kill_db_bench_cmd"
db_bench_cmd="$SSH $REMOTE_HOST_USER $db_bench_cmd"
ps_cmd="$SSH $REMOTE_HOST_USER $ps_cmd"
if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
echo "Running benchmark remotely on $REMOTE_USER_AT_HOST"
kill_db_bench_cmd="$SSH $REMOTE_USER_AT_HOST $kill_db_bench_cmd"
db_bench_cmd="$SSH $REMOTE_USER_AT_HOST $db_bench_cmd"
ps_cmd="$SSH $REMOTE_USER_AT_HOST $ps_cmd"
fi
## kill existing db_bench processes
@ -113,7 +194,7 @@ function run_db_bench {
exit_on_error $? "$ps_cmd"
# perform the actual command to check whether db_bench is running
grep_output="$(eval $ps_cmd | grep db_bench)"
grep_output="$(eval $ps_cmd | grep db_bench | grep -v grep)"
if [ "$grep_output" != "" ]; then
echo "Stopped regression_test.sh as there're still db_bench processes running:"
echo $grep_output
@ -193,21 +274,32 @@ function build_db_bench {
}
function run_remote {
if ! [ -z "$REMOTE_HOST_USER" ]; then
cmd="$SSH $REMOTE_HOST_USER $1"
if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
cmd="$SSH $REMOTE_USER_AT_HOST $1"
else
cmd="$1"
fi
result=0
eval "($cmd) || result=1"
exit_on_error $result "$cmd"
eval "$cmd"
exit_on_error $? "$cmd"
}
function run_local {
result=0
eval "($1 || result=1)"
exit_on_error $result
eval "$1"
exit_on_error $?
}
function setup_options_file {
if ! [ -z "$OPTIONS_FILE" ]; then
if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
options_file="$DB_BENCH_DIR/OPTIONS_FILE"
run_local "$SCP $OPTIONS_FILE $REMOTE_USER_AT_HOST:$options_file"
else
options_file="$OPTIONS_FILE"
fi
echo "--options_file=$options_file"
fi
echo ""
}
function setup_test_directory {
@ -215,14 +307,14 @@ function setup_test_directory {
run_remote "rm -rf $DB_PATH"
run_remote "rm -rf $WAL_PATH"
if ! [ -z "$REMOTE_HOST_USER" ]; then
if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
run_remote "rm -rf $DB_BENCH_DIR"
fi
run_remote "mkdir -p $DB_PATH"
run_remote "mkdir -p $WAL_PATH"
if ! [ -z "$REMOTE_HOST_USER" ]; then
if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
run_remote "mkdir -p $DB_BENCH_DIR"
run_local "$SCP ./db_bench $REMOTE_HOST_USER:$DB_BENCH_DIR/db_bench"
run_local "$SCP ./db_bench $REMOTE_USER_AT_HOST:$DB_BENCH_DIR/db_bench"
fi
run_local "rm -rf $RESULT_PATH"