Allow regression test to run db_bench at a remost host

Summary: This patch does the following improvement on the regression_test.sh * allows db_bench being executed at a remost host while storing the benchmark results locally. * kills all db_bench related processes before running db_bench * better error handling. Test Plan: 1. Run regression_test.sh both locally and remotely 2. Run multiple regression_test.sh at the same time and make sure i. Only one runs successfully. ii. The one that runs successfully will kill all other db_bench processes before it runs any benchmark. Reviewers: sdong, yiwu, gunnarku Reviewed By: gunnarku Subscribers: andrewkr, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D58611
2016-06-02 02:53:39 -07:00 · 2016-06-02 02:53:39 -07:00 · 8cf0f86d39
commit 8cf0f86d39
parent 27ad170712
1 changed files with 90 additions and 27 deletions
--- a/tools/regression_test.sh
+++ b/tools/regression_test.sh
@ -3,12 +3,13 @@

 function main {
  commit=${1:-"origin/master"}
-  test_root_dir=${2:-"/tmp/rocksdb/regression_test"}
+  test_root_dir=${TEST_PATH:-"/tmp/rocksdb/regression_test"}

-  init_arguments
+  init_arguments $test_root_dir

  checkout_rocksdb $commit
  build_db_bench
+
  setup_test_directory

  # an additional dot indicates we share same env variables
@ -31,12 +32,18 @@ function init_arguments {
  G=$((1024 * M))

  current_time=$(date +"%F-%H:%M:%S")
-  RESULT_PATH=${4:-"/tmp/rocksdb/regression/results/$current_time"}
+  RESULT_PATH=${2:-"$1/results/$current_time"}
  COMMIT_ID=`git log | head -n1 | cut -c 8-`
  SUMMARY_FILE="$RESULT_PATH/SUMMARY.csv"

-  DB_PATH=${2:-"/tmp/rocksdb/regression/db/"}
-  WAL_PATH=${3:-"/tmp/rocksdb/regression/wal/"}
+  DB_PATH=${3:-"$1/db/"}
+  WAL_PATH=${4:-"$1/wal/"}
+  if [ -z "$REMOTE_HOST_USER" ]; then
+    DB_BENCH_DIR=${5:-"."}
+  else
+    DB_BENCH_DIR=${5:-"$1/db_bench"}
+  fi
+
  NUM_THREADS=${NUM_THREADS:-16}
  NUM_KEYS=${NUM_KEYS:-$((1 * G))}
  KEY_SIZE=${KEY_SIZE:-100}
@ -56,6 +63,9 @@ function init_arguments {
 # $1 --- benchmark name
 # $2 --- use_existing_db (optional)
 function run_db_bench {
+  # this will terminate all currently-running db_bench
+  find_db_bench_cmd="ps aux | grep db_bench | grep -v grep | grep -v aux | awk '{print \$2}'"
+
  USE_EXISTING_DB=${2:-1}
  echo ""
  echo "======================================================================="
@ -63,7 +73,8 @@ function run_db_bench {
  echo "======================================================================="
  echo ""
  db_bench_error=0
-  cmd="(./db_bench --benchmarks=$1 --db=$DB_PATH --wal_dir=$WAL_PATH \
+  db_bench_cmd="$DB_BENCH_DIR/db_bench \
+      --benchmarks=$1 --db=$DB_PATH --wal_dir=$WAL_PATH \
      --use_existing_db=$USE_EXISTING_DB \
      --threads=$NUM_THREADS \
      --num=$NUM_KEYS \
@ -78,8 +89,40 @@ function run_db_bench {
      --stats_interval_seconds=$STATS_INTERVAL_SECONDS \
      --max_background_flushes=$MAX_BACKGROUND_FLUSHES \
      --max_background_compactions=$MAX_BACKGROUND_COMPACTIONS \
-      --seed=$SEED \
-      2>&1 || db_bench_error=1) | tee -a $RESULT_PATH/$1"
+      --seed=$SEED 2>&1"
+  kill_db_bench_cmd="pkill db_bench"
+  ps_cmd="ps aux"
+  if ! [ -z "$REMOTE_HOST_USER" ]; then
+    kill_db_bench_cmd="$SSH $REMOTE_HOST_USER $kill_db_bench_cmd"
+    db_bench_cmd="$SSH $REMOTE_HOST_USER $db_bench_cmd"
+    ps_cmd="$SSH $REMOTE_HOST_USER $ps_cmd"
+  fi
+
+  ## kill existing db_bench processes
+  eval "$kill_db_bench_cmd"
+  if [ $? -eq 0 ]; then
+    echo "Killed all currently running db_bench"
+  fi
+
+  ## make sure no db_bench is running
+  # The following statement is necessary make sure "eval $ps_cmd" will success.
+  # Otherwise, if we simply check whether "$(eval $ps_cmd | grep db_bench)" is
+  # successful or not, then it will always be false since grep will return
+  # non-zero status when there's no matching output.
+  ps_output="$(eval $ps_cmd)"
+  exit_on_error $? "$ps_cmd"
+
+  # perform the actual command to check whether db_bench is running
+  grep_output="$(eval $ps_cmd | grep db_bench)"
+  if [ "$grep_output" != "" ]; then
+    echo "Stopped regression_test.sh as there're still db_bench processes running:"
+    echo $grep_output
+    exit 1
+  fi
+
+  ## run the db_bench
+  cmd="($db_bench_cmd || db_bench_error=1) | tee -a $RESULT_PATH/$1"
+  exit_on_error $?
  echo $cmd
  eval $cmd
  exit_on_error $db_bench_error
@ -91,7 +134,9 @@ function run_db_bench {
 # $2 --- the filename of the output log of db_bench
 function update_report {
  main_result=`cat $2 | grep $1`
+  exit_on_error $?
  perc_statement=`cat $2 | grep Percentile`
+  exit_on_error $?

  # Obtain micros / op
  main_pattern="$1"'[[:blank:]]+:[[:blank:]]+([0-9\.]+)[[:blank:]]+micros/op'
@ -117,8 +162,11 @@ function update_report {
 function exit_on_error {
  if [ $1 -ne 0 ]; then
    echo ""
-    echo "ERROR: Benchmark did not complete successfully. " \
-         "Partial results are output to $RESULT_PATH"
+    echo "ERROR: Benchmark did not complete successfully."
+    if ! [ -z "$2" ]; then
+      echo "Failure command: $2"
+    fi
+    echo "Partial results are output to $RESULT_PATH"
    echo "ERROR" >> $SUMMARY_FILE
    exit $1
  fi
@ -144,26 +192,41 @@ function build_db_bench {
  exit_on_error $?
 }

+function run_remote {
+  if ! [ -z "$REMOTE_HOST_USER" ]; then
+    cmd="$SSH $REMOTE_HOST_USER $1"
+  else
+    cmd="$1"
+  fi
+  
+  result=0
+  eval "($cmd) || result=1"
+  exit_on_error $result "$cmd"
+}
+
+function run_local {
+  result=0
+  eval "($1 || result=1)"
+  exit_on_error $result
+}
+
 function setup_test_directory {
  echo "Deleting old regression test directories and creating new ones"

-  rm -rf "$DB_PATH"
-  exit_on_error $?
+  run_remote "rm -rf $DB_PATH"
+  run_remote "rm -rf $WAL_PATH"
+  if ! [ -z "$REMOTE_HOST_USER" ]; then
+    run_remote "rm -rf $DB_BENCH_DIR"
+  fi
+  run_remote "mkdir -p $DB_PATH"
+  run_remote "mkdir -p $WAL_PATH"
+  if ! [ -z "$REMOTE_HOST_USER" ]; then
+    run_remote "mkdir -p $DB_BENCH_DIR"
+    run_local "$SCP ./db_bench $REMOTE_HOST_USER:$DB_BENCH_DIR/db_bench"
+  fi
  
-  rm -rf "$WAL_PATH"
-  exit_on_error $?
-
-  rm -rf "$RESULT_PATH"
-  exit_on_error $?
-
-  mkdir -p "$DB_PATH"
-  exit_on_error $?
-
-  mkdir -p "$WAL_PATH"
-  exit_on_error $?
-
-  mkdir -p "$RESULT_PATH"
-  exit_on_error $?
+  run_local "rm -rf $RESULT_PATH"
+  run_local "mkdir -p $RESULT_PATH"

  printf "%40s, %30s, %10s, %10s, %10s, %10s, %10s, %10s\n" \
      "commit id" "benchmark" "ms-per-op" "p50" "p75" "p99" "p99.9" "p99.99" \