rocksdb/tools/db_crashtest2.py
Xing Jin af732c7ba8 Add universal compaction to db_stress nightly build
Summary:
Most code change in this diff is code cleanup/rewrite. The logic changes include:

(1) add universal compaction to db_crashtest2.py
(2) randomly set --test_batches_snapshots to be 0 or 1 in db_crashtest2.py. Old codes always use 1.
(3) use different tmp directory as db directory in different runs. I saw some intermittent errors in my local tests. Use of different tmp directory seems to be able to solve the issue.

Test Plan: Have run "make crashtest" for multiple times. Also run "make all check"

Reviewers: emayanke, dhruba, haobo

Reviewed By: emayanke

Differential Revision: https://reviews.facebook.net/D12369
2013-08-20 17:37:49 -07:00

163 lines
5.5 KiB
Python

#! /usr/bin/env python
import os
import re
import sys
import time
import random
import getopt
import logging
import tempfile
import subprocess
# This python script runs db_stress multiple times. Some runs with
# kill_random_test that causes leveldb to crash at various points in code.
def main(argv):
try:
opts, args = getopt.getopt(argv, "hd:t:k:o:b:")
except getopt.GetoptError:
print str(getopt.GetoptError)
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
"-k <kills with prob 1/k> -o <ops_per_thread> "\
"-b <write_buffer_size>\n"
sys.exit(2)
# default values, will be overridden by cmdline args
kill_random_test = 97 # kill with probability 1/97 by default
duration = 10000 # total time for this script to test db_stress
threads = 32
ops_per_thread = 200000
write_buf_size = 4 * 1024 * 1024
for opt, arg in opts:
if opt == '-h':
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
"-k <kills with prob 1/k> -o <ops_per_thread> " \
"-b <write_buffer_size>\n"
sys.exit()
elif opt == "-d":
duration = int(arg)
elif opt == "-t":
threads = int(arg)
elif opt == "-k":
kill_random_test = int(arg)
elif opt == "-o":
ops_per_thread = int(arg)
elif opt == "-b":
write_buf_size = int(arg)
else:
print "unrecognized option " + str(opt) + "\n"
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
"-k <kills with prob 1/k> -o <ops_per_thread> " \
"-b <write_buffer_size>\n"
sys.exit(2)
exit_time = time.time() + duration
print "Running whitebox-crash-test with \ntotal-duration=" + str(duration) \
+ "\nthreads=" + str(threads) + "\nops_per_thread=" \
+ str(ops_per_thread) + "\nwrite_buffer_size=" \
+ str(write_buf_size) + "\n"
total_check_mode = 3
check_mode = 0
while time.time() < exit_time:
killoption = ""
if check_mode == 0:
# run with kill_random_test
killoption = " --kill_random_test=" + str(kill_random_test)
# use large ops per thread since we will kill it anyway
additional_opts = "--ops_per_thread=" + \
str(100 * ops_per_thread) + killoption
elif check_mode == 1:
# normal run with universal compaction mode
additional_opts = "--ops_per_thread=" + str(ops_per_thread) + \
" --compaction_style=1"
else:
# nomral run
additional_opts = "--ops_per_thread=" + str(ops_per_thread)
cmd = re.sub('\s+', ' ', """
./db_stress
--test_batches_snapshots=%s
--threads=%s
--write_buffer_size=%s
--destroy_db_initially=0
--reopen=0
--readpercent=50
--prefixpercent=5
--writepercent=40
--delpercent=5
--db=%s
--max_key=100000000
--disable_seek_compaction=%s
--mmap_read=%s
--block_size=16384
--cache_size=1048576
--open_files=500000
--verify_checksum=1
--sync=%s
--disable_wal=0
--disable_data_sync=%s
--target_file_size_base=2097152
--target_file_size_multiplier=2
--max_write_buffer_number=3
--max_background_compactions=20
--max_bytes_for_level_base=10485760
--filter_deletes=%s
%s
""" % (random.randint(0, 1),
threads,
write_buf_size,
tempfile.mkdtemp(),
random.randint(0, 1),
random.randint(0, 1),
random.randint(0, 1),
random.randint(0, 1),
random.randint(0, 1),
additional_opts))
print "Running:" + cmd + "\n"
popen = subprocess.Popen([cmd], stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True)
stdoutdata, stderrdata = popen.communicate()
retncode = popen.returncode
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
check_mode, killoption, retncode))
print msg
print stdoutdata
expected = False
if (killoption == '') and (retncode == 0):
# we expect zero retncode if no kill option
expected = True
elif killoption != '' and retncode < 0:
# we expect negative retncode if kill option was given
expected = True
if not expected:
print "TEST FAILED. See kill option and exit code above!!!\n"
sys.exit(1)
stdoutdata = stdoutdata.lower()
errorcount = (stdoutdata.count('error') -
stdoutdata.count('got errors 0 times'))
print "#times error occured in output is " + str(errorcount) + "\n"
if (errorcount > 0):
print "TEST FAILED. Output has 'error'!!!\n"
sys.exit(2)
if (stdoutdata.find('fail') >= 0):
print "TEST FAILED. Output has 'fail'!!!\n"
sys.exit(2)
check_mode = (check_mode + 1) % total_check_mode
time.sleep(1) # time to stabilize after a kill
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))