2013-04-05 22:44:59 +02:00
|
|
|
#! /usr/bin/env python
|
2013-03-13 07:20:14 +01:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import shlex
|
|
|
|
import getopt
|
|
|
|
import logging
|
2013-04-05 22:44:59 +02:00
|
|
|
import tempfile
|
2013-03-13 07:20:14 +01:00
|
|
|
import subprocess
|
|
|
|
|
|
|
|
# This python script runs and kills db_stress multiple times with
|
|
|
|
# test-batches-snapshot ON,
|
|
|
|
# total operations much less than the total keys, and
|
|
|
|
# a high read percentage.
|
|
|
|
# This checks consistency in case of unsafe crashes in Rocksdb
|
|
|
|
|
|
|
|
def main(argv):
|
|
|
|
try:
|
|
|
|
opts, args = getopt.getopt(argv, "hd:t:i:o:b:")
|
|
|
|
except getopt.GetoptError:
|
2013-04-10 21:15:30 +02:00
|
|
|
print("db_crashtest.py -d <duration_test> -t <#threads> "
|
|
|
|
"-i <interval for one run> -o <ops_per_thread>\n")
|
2013-03-13 07:20:14 +01:00
|
|
|
sys.exit(2)
|
|
|
|
|
|
|
|
# default values, will be overridden by cmdline args
|
|
|
|
interval = 120 # time for one db_stress instance to run
|
|
|
|
duration = 6000 # total time for this script to test db_stress
|
|
|
|
threads = 32
|
2013-04-03 12:40:39 +02:00
|
|
|
# since we will be killing anyway, use large value for ops_per_thread
|
|
|
|
ops_per_thread = 10000000
|
2013-03-13 07:20:14 +01:00
|
|
|
write_buf_size = 4 * 1024 * 1024
|
|
|
|
|
|
|
|
for opt, arg in opts:
|
|
|
|
if opt == '-h':
|
2013-04-10 21:15:30 +02:00
|
|
|
print("db_crashtest.py -d <duration_test>"
|
|
|
|
" -t <#threads> -i <interval for one run>"
|
|
|
|
" -o <ops_per_thread> -b <write_buffer_size>\n")
|
2013-03-13 07:20:14 +01:00
|
|
|
sys.exit()
|
|
|
|
elif opt == ("-d"):
|
|
|
|
duration = int(arg)
|
|
|
|
elif opt == ("-t"):
|
|
|
|
threads = int(arg)
|
|
|
|
elif opt == ("-i"):
|
|
|
|
interval = int(arg)
|
|
|
|
elif opt == ("-o"):
|
|
|
|
ops_per_thread = int(arg)
|
|
|
|
elif opt == ("-b"):
|
|
|
|
write_buf_size = int(arg)
|
|
|
|
else:
|
2013-04-10 21:15:30 +02:00
|
|
|
print("db_crashtest.py -d <duration_test>"
|
|
|
|
" -t <#threads> -i <interval for one run>"
|
|
|
|
" -o <ops_per_thread> -b <write_buffer_size>\n")
|
2013-03-13 07:20:14 +01:00
|
|
|
sys.exit(2)
|
|
|
|
|
|
|
|
exit_time = time.time() + duration
|
|
|
|
|
2013-04-05 22:44:59 +02:00
|
|
|
dirpath = tempfile.mkdtemp()
|
|
|
|
|
2013-04-10 21:15:30 +02:00
|
|
|
print("Running crash-test with \ninterval_between_crash="
|
|
|
|
+ str(interval) + "\ntotal-duration=" + str(duration)
|
|
|
|
+ "\nthreads=" + str(threads) + "\nops_per_thread="
|
|
|
|
+ str(ops_per_thread) + "\nwrite_buffer_size="
|
|
|
|
+ str(write_buf_size) + "\n")
|
|
|
|
|
2013-03-13 07:20:14 +01:00
|
|
|
while time.time() < exit_time:
|
|
|
|
run_had_errors = False
|
|
|
|
killtime = time.time() + interval
|
2013-04-05 22:44:59 +02:00
|
|
|
child = subprocess.Popen(['./db_stress \
|
2013-03-13 07:20:14 +01:00
|
|
|
--test_batches_snapshots=1 \
|
|
|
|
--ops_per_thread=0' + str(ops_per_thread) + ' \
|
|
|
|
--threads=0' + str(threads) + ' \
|
|
|
|
--write_buffer_size=' + str(write_buf_size) + '\
|
2013-04-05 22:44:59 +02:00
|
|
|
--destroy_db_initially=0 \
|
2013-04-03 12:40:39 +02:00
|
|
|
--reopen=0 \
|
2013-03-13 07:20:14 +01:00
|
|
|
--readpercent=50 \
|
2013-04-05 22:44:59 +02:00
|
|
|
--db=' + dirpath + '\
|
2013-04-03 12:40:39 +02:00
|
|
|
--max_key=1000'], stderr=subprocess.PIPE, shell=True)
|
2013-04-10 21:15:30 +02:00
|
|
|
print("Running db_stress with pid=%d\n" % child.pid)
|
2013-03-13 07:20:14 +01:00
|
|
|
time.sleep(interval)
|
|
|
|
while True:
|
|
|
|
if time.time() > killtime:
|
|
|
|
if child.poll() is not None:
|
2013-04-10 21:15:30 +02:00
|
|
|
print("WARNING: db_stress ended before kill\n")
|
2013-03-13 07:20:14 +01:00
|
|
|
else:
|
|
|
|
child.kill()
|
2013-04-10 21:15:30 +02:00
|
|
|
print("KILLED %d\n" % child.pid)
|
2013-03-13 07:20:14 +01:00
|
|
|
time.sleep(1) # time to stabilize after a kill
|
|
|
|
|
|
|
|
while True:
|
|
|
|
line = child.stderr.readline().strip()
|
|
|
|
if line != '':
|
|
|
|
run_had_errors = True
|
2013-04-10 21:15:30 +02:00
|
|
|
print('***' + line + '^')
|
2013-03-13 07:20:14 +01:00
|
|
|
else:
|
|
|
|
break
|
|
|
|
if run_had_errors:
|
|
|
|
sys.exit(2)
|
|
|
|
break
|
|
|
|
|
|
|
|
time.sleep(1) # time to stabilize before the next run
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
sys.exit(main(sys.argv[1:]))
|