2013-04-04 23:49:43 -07:00
|
|
|
#! /usr/bin/env python
|
|
|
|
import os
|
2013-08-20 17:37:49 -07:00
|
|
|
import re
|
2013-04-04 23:49:43 -07:00
|
|
|
import sys
|
|
|
|
import time
|
2013-06-08 12:29:43 -07:00
|
|
|
import random
|
2013-04-04 23:49:43 -07:00
|
|
|
import getopt
|
|
|
|
import logging
|
|
|
|
import tempfile
|
|
|
|
import subprocess
|
2014-03-20 11:11:08 -07:00
|
|
|
import shutil
|
2013-04-04 23:49:43 -07:00
|
|
|
|
2013-08-20 17:37:49 -07:00
|
|
|
# This python script runs db_stress multiple times. Some runs with
|
2013-10-04 22:32:05 -07:00
|
|
|
# kill_random_test that causes rocksdb to crash at various points in code.
|
2013-08-20 17:37:49 -07:00
|
|
|
|
2013-04-04 23:49:43 -07:00
|
|
|
def main(argv):
|
|
|
|
try:
|
|
|
|
opts, args = getopt.getopt(argv, "hd:t:k:o:b:")
|
|
|
|
except getopt.GetoptError:
|
|
|
|
print str(getopt.GetoptError)
|
|
|
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
2013-08-20 17:37:49 -07:00
|
|
|
"-k <kills with prob 1/k> -o <ops_per_thread> "\
|
|
|
|
"-b <write_buffer_size>\n"
|
2013-04-04 23:49:43 -07:00
|
|
|
sys.exit(2)
|
|
|
|
|
|
|
|
# default values, will be overridden by cmdline args
|
|
|
|
kill_random_test = 97 # kill with probability 1/97 by default
|
2013-08-20 17:37:49 -07:00
|
|
|
duration = 10000 # total time for this script to test db_stress
|
2013-04-04 23:49:43 -07:00
|
|
|
threads = 32
|
|
|
|
ops_per_thread = 200000
|
|
|
|
write_buf_size = 4 * 1024 * 1024
|
|
|
|
|
|
|
|
for opt, arg in opts:
|
|
|
|
if opt == '-h':
|
|
|
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
2013-08-20 17:37:49 -07:00
|
|
|
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
|
|
|
"-b <write_buffer_size>\n"
|
2013-04-04 23:49:43 -07:00
|
|
|
sys.exit()
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-d":
|
2013-04-04 23:49:43 -07:00
|
|
|
duration = int(arg)
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-t":
|
2013-04-04 23:49:43 -07:00
|
|
|
threads = int(arg)
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-k":
|
2013-04-04 23:49:43 -07:00
|
|
|
kill_random_test = int(arg)
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-o":
|
2013-04-04 23:49:43 -07:00
|
|
|
ops_per_thread = int(arg)
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-b":
|
2013-04-04 23:49:43 -07:00
|
|
|
write_buf_size = int(arg)
|
|
|
|
else:
|
|
|
|
print "unrecognized option " + str(opt) + "\n"
|
|
|
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
2013-08-20 17:37:49 -07:00
|
|
|
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
|
|
|
"-b <write_buffer_size>\n"
|
2013-04-04 23:49:43 -07:00
|
|
|
sys.exit(2)
|
|
|
|
|
|
|
|
exit_time = time.time() + duration
|
|
|
|
|
2013-08-20 17:37:49 -07:00
|
|
|
print "Running whitebox-crash-test with \ntotal-duration=" + str(duration) \
|
|
|
|
+ "\nthreads=" + str(threads) + "\nops_per_thread=" \
|
|
|
|
+ str(ops_per_thread) + "\nwrite_buffer_size=" \
|
|
|
|
+ str(write_buf_size) + "\n"
|
2013-05-23 19:10:13 -07:00
|
|
|
|
2014-05-22 10:24:24 -07:00
|
|
|
total_check_mode = 4
|
2013-08-20 17:37:49 -07:00
|
|
|
check_mode = 0
|
2013-04-04 23:49:43 -07:00
|
|
|
|
|
|
|
while time.time() < exit_time:
|
2013-08-20 17:37:49 -07:00
|
|
|
killoption = ""
|
|
|
|
if check_mode == 0:
|
|
|
|
# run with kill_random_test
|
|
|
|
killoption = " --kill_random_test=" + str(kill_random_test)
|
|
|
|
# use large ops per thread since we will kill it anyway
|
|
|
|
additional_opts = "--ops_per_thread=" + \
|
|
|
|
str(100 * ops_per_thread) + killoption
|
|
|
|
elif check_mode == 1:
|
|
|
|
# normal run with universal compaction mode
|
|
|
|
additional_opts = "--ops_per_thread=" + str(ops_per_thread) + \
|
|
|
|
" --compaction_style=1"
|
2014-05-22 10:24:24 -07:00
|
|
|
elif check_mode == 2:
|
|
|
|
# normal run with FIFO compaction mode
|
|
|
|
# ops_per_thread is divided by 5 because FIFO compaction
|
|
|
|
# style is quite a bit slower on reads with lot of files
|
|
|
|
additional_opts = "--ops_per_thread=" + str(ops_per_thread / 5) + \
|
|
|
|
" --compaction_style=2"
|
2013-04-04 23:49:43 -07:00
|
|
|
else:
|
2014-05-22 10:24:24 -07:00
|
|
|
# normal run
|
2013-08-20 17:37:49 -07:00
|
|
|
additional_opts = "--ops_per_thread=" + str(ops_per_thread)
|
|
|
|
|
2014-03-20 11:11:08 -07:00
|
|
|
dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest_')
|
2013-08-20 17:37:49 -07:00
|
|
|
cmd = re.sub('\s+', ' ', """
|
|
|
|
./db_stress
|
|
|
|
--test_batches_snapshots=%s
|
|
|
|
--threads=%s
|
|
|
|
--write_buffer_size=%s
|
|
|
|
--destroy_db_initially=0
|
2014-03-03 17:10:30 -08:00
|
|
|
--reopen=20
|
2013-09-20 16:27:22 -07:00
|
|
|
--readpercent=45
|
2014-03-12 22:21:13 -07:00
|
|
|
--prefixpercent=5
|
|
|
|
--writepercent=35
|
2013-08-20 17:37:49 -07:00
|
|
|
--delpercent=5
|
2013-09-20 16:27:22 -07:00
|
|
|
--iterpercent=10
|
2013-08-20 17:37:49 -07:00
|
|
|
--db=%s
|
|
|
|
--max_key=100000000
|
|
|
|
--mmap_read=%s
|
|
|
|
--block_size=16384
|
|
|
|
--cache_size=1048576
|
|
|
|
--open_files=500000
|
|
|
|
--verify_checksum=1
|
2014-03-12 15:12:09 -07:00
|
|
|
--sync=0
|
2014-03-19 09:58:41 -07:00
|
|
|
--progress_reports=0
|
2013-08-20 17:37:49 -07:00
|
|
|
--disable_wal=0
|
2014-03-12 15:12:09 -07:00
|
|
|
--disable_data_sync=1
|
2013-08-20 17:37:49 -07:00
|
|
|
--target_file_size_base=2097152
|
|
|
|
--target_file_size_multiplier=2
|
|
|
|
--max_write_buffer_number=3
|
|
|
|
--max_background_compactions=20
|
|
|
|
--max_bytes_for_level_base=10485760
|
|
|
|
--filter_deletes=%s
|
2014-03-12 22:21:13 -07:00
|
|
|
--memtablerep=prefix_hash
|
|
|
|
--prefix_size=7
|
2013-08-20 17:37:49 -07:00
|
|
|
%s
|
|
|
|
""" % (random.randint(0, 1),
|
|
|
|
threads,
|
|
|
|
write_buf_size,
|
2014-03-20 11:11:08 -07:00
|
|
|
dbname,
|
2013-08-20 17:37:49 -07:00
|
|
|
random.randint(0, 1),
|
|
|
|
random.randint(0, 1),
|
|
|
|
additional_opts))
|
|
|
|
|
|
|
|
print "Running:" + cmd + "\n"
|
|
|
|
|
|
|
|
popen = subprocess.Popen([cmd], stdout=subprocess.PIPE,
|
2013-05-23 19:10:13 -07:00
|
|
|
stderr=subprocess.STDOUT,
|
|
|
|
shell=True)
|
|
|
|
stdoutdata, stderrdata = popen.communicate()
|
|
|
|
retncode = popen.returncode
|
2013-08-20 17:37:49 -07:00
|
|
|
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
|
|
|
|
check_mode, killoption, retncode))
|
2013-05-23 19:10:13 -07:00
|
|
|
print msg
|
|
|
|
print stdoutdata
|
|
|
|
|
|
|
|
expected = False
|
|
|
|
if (killoption == '') and (retncode == 0):
|
|
|
|
# we expect zero retncode if no kill option
|
|
|
|
expected = True
|
|
|
|
elif killoption != '' and retncode < 0:
|
|
|
|
# we expect negative retncode if kill option was given
|
|
|
|
expected = True
|
|
|
|
|
|
|
|
if not expected:
|
2013-06-07 11:06:20 -07:00
|
|
|
print "TEST FAILED. See kill option and exit code above!!!\n"
|
2013-05-23 19:10:13 -07:00
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
stdoutdata = stdoutdata.lower()
|
2013-06-07 11:06:20 -07:00
|
|
|
errorcount = (stdoutdata.count('error') -
|
|
|
|
stdoutdata.count('got errors 0 times'))
|
2013-11-16 11:21:34 +00:00
|
|
|
print "#times error occurred in output is " + str(errorcount) + "\n"
|
2013-06-07 11:06:20 -07:00
|
|
|
|
|
|
|
if (errorcount > 0):
|
|
|
|
print "TEST FAILED. Output has 'error'!!!\n"
|
|
|
|
sys.exit(2)
|
|
|
|
if (stdoutdata.find('fail') >= 0):
|
|
|
|
print "TEST FAILED. Output has 'fail'!!!\n"
|
2013-05-23 19:10:13 -07:00
|
|
|
sys.exit(2)
|
2014-03-20 11:11:08 -07:00
|
|
|
# we need to clean up after ourselves -- only do this on test success
|
|
|
|
shutil.rmtree(dbname, True)
|
2013-08-20 17:37:49 -07:00
|
|
|
|
|
|
|
check_mode = (check_mode + 1) % total_check_mode
|
|
|
|
|
2013-04-04 23:49:43 -07:00
|
|
|
time.sleep(1) # time to stabilize after a kill
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
sys.exit(main(sys.argv[1:]))
|