Python script to periodically run and kill the db_stress test

Summary: The script runs and kills the stress test periodically. Default values have been used in the script now. Should I make this a part of the Makefile or automated rocksdb build? The values can be easily changed in the script right now, but should I add some support for variable values or input to the script? I believe the script achieves its objective of unsafe crashes and reopening to expect sanity in the database.

Test Plan: python tools/db_crashtest.py

Reviewers: dhruba, vamsi, MarkCallaghan

Reviewed By: vamsi

CC: leveldb

Differential Revision: https://reviews.facebook.net/D9369
This commit is contained in:
Mayank Agarwal 2013-03-12 23:20:14 -07:00
parent 645ff8f231
commit e937d47180
2 changed files with 96 additions and 4 deletions

93
tools/db_crashtest.py Normal file
View File

@ -0,0 +1,93 @@
import os
import sys
import time
import shlex
import getopt
import logging
import subprocess
# This python script runs and kills db_stress multiple times with
# test-batches-snapshot ON,
# total operations much less than the total keys, and
# a high read percentage.
# This checks consistency in case of unsafe crashes in Rocksdb
def main(argv):
os.system("make -C ~/rocksdb db_stress")
try:
opts, args = getopt.getopt(argv, "hd:t:i:o:b:")
except getopt.GetoptError:
print "db_crashtest.py -d <duration_test> -t <#threads> " \
"-i <interval for one run> -o <ops_per_thread>\n"
sys.exit(2)
# default values, will be overridden by cmdline args
interval = 120 # time for one db_stress instance to run
duration = 6000 # total time for this script to test db_stress
threads = 32
ops_per_thread = 500000
write_buf_size = 4 * 1024 * 1024
for opt, arg in opts:
if opt == '-h':
print "db_crashtest.py -d <duration_test> -t <#threads> " \
"-i <interval for one run> -o <ops_per_thread> "\
"-b <write_buffer_size>\n"
sys.exit()
elif opt == ("-d"):
duration = int(arg)
elif opt == ("-t"):
threads = int(arg)
elif opt == ("-i"):
interval = int(arg)
elif opt == ("-o"):
ops_per_thread = int(arg)
elif opt == ("-b"):
write_buf_size = int(arg)
else:
print "db_crashtest.py -d <duration_test> -t <#threads> " \
"-i <interval for one run> -o <ops_per_thread> " \
"-b <write_buffer_size>\n"
sys.exit(2)
exit_time = time.time() + duration
while time.time() < exit_time:
run_had_errors = False
print "Running db_stress \n"
os.system("mkdir -p /tmp/rocksdb/crashtest")
killtime = time.time() + interval
child = subprocess.Popen(['~/rocksdb/db_stress \
--test_batches_snapshots=1 \
--ops_per_thread=0' + str(ops_per_thread) + ' \
--threads=0' + str(threads) + ' \
--write_buffer_size=' + str(write_buf_size) + '\
--reopen=10 \
--readpercent=50 \
--db=/tmp/rocksdb/crashtest \
--max_key=100'], stderr=subprocess.PIPE, shell=True)
time.sleep(interval)
while True:
if time.time() > killtime:
if child.poll() is not None:
logging.warn("WARNING: db_stress completed before kill\n")
else:
child.kill()
print "KILLED \n"
time.sleep(1) # time to stabilize after a kill
while True:
line = child.stderr.readline().strip()
if line != '':
run_had_errors = True
print '***' + line + '^'
else:
break
if run_had_errors:
sys.exit(2)
break
time.sleep(1) # time to stabilize before the next run
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))

View File

@ -232,8 +232,7 @@ class Stats {
double micros = now - last_op_finish_;
hist_.Add(micros);
if (micros > 20000) {
fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
fflush(stderr);
fprintf(stdout, "long op: %.1f micros%30s\r", micros, "");
}
last_op_finish_ = now;
}
@ -247,8 +246,7 @@ class Stats {
else if (next_report_ < 100000) next_report_ += 10000;
else if (next_report_ < 500000) next_report_ += 50000;
else next_report_ += 100000;
fprintf(stderr, "... finished %ld ops%30s\r", done_, "");
fflush(stderr);
fprintf(stdout, "... finished %ld ops%30s\r", done_, "");
}
}
@ -868,6 +866,7 @@ class StressTest {
fprintf(stdout, "Number of threads : %d\n", FLAGS_threads);
fprintf(stdout, "Ops per thread : %d\n", FLAGS_ops_per_thread);
fprintf(stdout, "Read percentage : %d\n", FLAGS_readpercent);
fprintf(stdout, "Write-buffer-size : %d\n", FLAGS_write_buffer_size);
fprintf(stdout, "Delete percentage : %d\n", FLAGS_delpercent);
fprintf(stdout, "Max key : %ld\n", FLAGS_max_key);
fprintf(stdout, "Ratio #ops/#keys : %ld\n",