#7916298: merge tools/db_crashtest2.py into tools/db_crashtest.py
Summary: merge tools/db_crashtest2.py into tools/db_crashtest.py python tools/db_crashtest.py -h # show help message, ALL parameters can be overwrite by arguments Example usages: python tools/db_crashtest.py blackbox # run blackbox with default parameters python tools/db_crashtest.py blackbox --simple python tools/db_crashtest.py whitebox # run whitebox with default parameters python tools/db_crashtest.py whitebox --simple all default parameters are identical to previous version. Test Plan: `make crash_test` and make sure it can run with expected parameters pased to db_stress. Reviewers: igor, rven, anthony, IslamAbdelRahman, yhchiang, sdong Reviewed By: sdong Subscribers: dhruba Differential Revision: https://reviews.facebook.net/D48567
This commit is contained in:
parent
ec1f8354a9
commit
4575de5b9e
8
Makefile
8
Makefile
@ -568,12 +568,12 @@ ldb_tests: ldb
|
|||||||
crash_test: whitebox_crash_test blackbox_crash_test
|
crash_test: whitebox_crash_test blackbox_crash_test
|
||||||
|
|
||||||
blackbox_crash_test: db_stress
|
blackbox_crash_test: db_stress
|
||||||
python -u tools/db_crashtest.py -s
|
python -u tools/db_crashtest.py --simple blackbox
|
||||||
python -u tools/db_crashtest.py
|
python -u tools/db_crashtest.py blackbox
|
||||||
|
|
||||||
whitebox_crash_test: db_stress
|
whitebox_crash_test: db_stress
|
||||||
python -u tools/db_crashtest2.py -s
|
python -u tools/db_crashtest.py --simple whitebox
|
||||||
python -u tools/db_crashtest2.py
|
python -u tools/db_crashtest.py whitebox
|
||||||
|
|
||||||
asan_check:
|
asan_check:
|
||||||
$(MAKE) clean
|
$(MAKE) clean
|
||||||
|
@ -4,161 +4,183 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
import getopt
|
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
import shutil
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# params overwrite priority:
|
||||||
|
# for default:
|
||||||
|
# default_params < blackbox|whitebox_default_params < args
|
||||||
|
# for simple:
|
||||||
|
# simple_default_params < blackbox|whitebox_simple_default_params < args
|
||||||
|
|
||||||
|
default_params = {
|
||||||
|
"block_size": 16384,
|
||||||
|
"cache_size": 1048576,
|
||||||
|
"delpercent": 5,
|
||||||
|
"destroy_db_initially": 0,
|
||||||
|
"disable_data_sync": 0,
|
||||||
|
"disable_wal": 0,
|
||||||
|
"filter_deletes": lambda: random.randint(0, 1),
|
||||||
|
"iterpercent": 10,
|
||||||
|
"max_background_compactions": 20,
|
||||||
|
"max_bytes_for_level_base": 10485760,
|
||||||
|
"max_key": 100000000,
|
||||||
|
"max_write_buffer_number": 3,
|
||||||
|
"memtablerep": "prefix_hash",
|
||||||
|
"mmap_read": lambda: random.randint(0, 1),
|
||||||
|
"open_files": 500000,
|
||||||
|
"prefix_size": 7,
|
||||||
|
"prefixpercent": 5,
|
||||||
|
"progress_reports": 0,
|
||||||
|
"readpercent": 45,
|
||||||
|
"reopen": 20,
|
||||||
|
"sync": 0,
|
||||||
|
"target_file_size_base": 2097152,
|
||||||
|
"target_file_size_multiplier": 2,
|
||||||
|
"threads": 32,
|
||||||
|
"verify_checksum": 1,
|
||||||
|
"write_buffer_size": 4 * 1024 * 1024,
|
||||||
|
"writepercent": 35,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_dbname(test_name):
|
||||||
|
test_tmpdir = os.environ.get("TEST_TMPDIR")
|
||||||
|
if test_tmpdir is None or test_tmpdir == "":
|
||||||
|
dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest_' + test_name)
|
||||||
|
else:
|
||||||
|
dbname = test_tmpdir + "/rocksdb_crashtest_" + test_name
|
||||||
|
shutil.rmtree(dbname, True)
|
||||||
|
return dbname
|
||||||
|
|
||||||
|
blackbox_default_params = {
|
||||||
|
'db': lambda: get_dbname('blackbox'),
|
||||||
|
# total time for this script to test db_stress
|
||||||
|
"duration": 6000,
|
||||||
|
# time for one db_stress instance to run
|
||||||
|
"interval": 120,
|
||||||
|
# since we will be killing anyway, use large value for ops_per_thread
|
||||||
|
"ops_per_thread": 100000000,
|
||||||
|
"set_options_one_in": 10000,
|
||||||
|
"test_batches_snapshots": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
whitebox_default_params = {
|
||||||
|
'db': lambda: get_dbname('whitebox'),
|
||||||
|
"duration": 10000,
|
||||||
|
"log2_keys_per_lock": 10,
|
||||||
|
"nooverwritepercent": 1,
|
||||||
|
"ops_per_thread": 200000,
|
||||||
|
"test_batches_snapshots": lambda: random.randint(0, 1),
|
||||||
|
"write_buffer_size": 4 * 1024 * 1024,
|
||||||
|
}
|
||||||
|
|
||||||
|
simple_default_params = {
|
||||||
|
"block_size": 16384,
|
||||||
|
"cache_size": 1048576,
|
||||||
|
"column_families": 1,
|
||||||
|
"delpercent": 5,
|
||||||
|
"destroy_db_initially": 0,
|
||||||
|
"disable_data_sync": 0,
|
||||||
|
"disable_wal": 0,
|
||||||
|
"filter_deletes": lambda: random.randint(0, 1),
|
||||||
|
"iterpercent": 10,
|
||||||
|
"max_background_compactions": 1,
|
||||||
|
"max_bytes_for_level_base": 67108864,
|
||||||
|
"max_key": 100000000,
|
||||||
|
"max_write_buffer_number": 3,
|
||||||
|
"memtablerep": "skip_list",
|
||||||
|
"mmap_read": lambda: random.randint(0, 1),
|
||||||
|
"prefix_size": 0,
|
||||||
|
"prefixpercent": 0,
|
||||||
|
"progress_reports": 0,
|
||||||
|
"readpercent": 50,
|
||||||
|
"reopen": 20,
|
||||||
|
"sync": 0,
|
||||||
|
"target_file_size_base": 16777216,
|
||||||
|
"target_file_size_multiplier": 1,
|
||||||
|
"test_batches_snapshots": 0,
|
||||||
|
"threads": 32,
|
||||||
|
"verify_checksum": 1,
|
||||||
|
"write_buffer_size": 32 * 1024 * 1024,
|
||||||
|
"writepercent": 35,
|
||||||
|
}
|
||||||
|
|
||||||
|
blackbox_simple_default_params = {
|
||||||
|
'db': lambda: get_dbname('blackbox'),
|
||||||
|
"duration": 6000,
|
||||||
|
"interval": 120,
|
||||||
|
"open_files": -1,
|
||||||
|
"ops_per_thread": 100000000,
|
||||||
|
"set_options_one_in": 0,
|
||||||
|
"test_batches_snapshots": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
whitebox_simple_default_params = {
|
||||||
|
'db': lambda: get_dbname('whitebox'),
|
||||||
|
"duration": 10000,
|
||||||
|
"log2_keys_per_lock": 10,
|
||||||
|
"nooverwritepercent": 1,
|
||||||
|
"open_files": 500000,
|
||||||
|
"ops_per_thread": 200000,
|
||||||
|
"write_buffer_size": 32 * 1024 * 1024,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def gen_cmd_params(args):
|
||||||
|
params = {}
|
||||||
|
|
||||||
|
if args.simple:
|
||||||
|
params.update(simple_default_params)
|
||||||
|
if args.test_type == 'blackbox':
|
||||||
|
params.update(blackbox_simple_default_params)
|
||||||
|
if args.test_type == 'whitebox':
|
||||||
|
params.update(whitebox_simple_default_params)
|
||||||
|
|
||||||
|
if not args.simple:
|
||||||
|
params.update(default_params)
|
||||||
|
if args.test_type == 'blackbox':
|
||||||
|
params.update(blackbox_default_params)
|
||||||
|
if args.test_type == 'whitebox':
|
||||||
|
params.update(whitebox_default_params)
|
||||||
|
|
||||||
|
for k, v in vars(args).items():
|
||||||
|
if v is not None:
|
||||||
|
params[k] = v
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def gen_cmd(params):
|
||||||
|
cmd = './db_stress ' + ' '.join(
|
||||||
|
'--{0}={1}'.format(k, v() if callable(v) else v)
|
||||||
|
for k, v in params.items()
|
||||||
|
if k not in set(['test_type', 'simple', 'duration', 'interval'])
|
||||||
|
and v is not None)
|
||||||
|
return cmd
|
||||||
|
|
||||||
|
|
||||||
# This script runs and kills db_stress multiple times. It checks consistency
|
# This script runs and kills db_stress multiple times. It checks consistency
|
||||||
# in case of unsafe crashes in RocksDB.
|
# in case of unsafe crashes in RocksDB.
|
||||||
|
def blackbox_crash_main(args):
|
||||||
|
cmd_params = gen_cmd_params(args)
|
||||||
|
|
||||||
def main(argv):
|
exit_time = time.time() + cmd_params['duration']
|
||||||
try:
|
|
||||||
opts, args = getopt.getopt(argv, "hsd:t:i:o:b:")
|
|
||||||
except getopt.GetoptError:
|
|
||||||
print("db_crashtest.py -d <duration_test> -t <#threads> "
|
|
||||||
"-i <interval for one run> -o <ops_per_thread> "
|
|
||||||
"-b <write_buffer_size> [-s (simple mode)]\n")
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
# default values, will be overridden by cmdline args
|
print("Running blackbox-crash-test with \n"
|
||||||
interval = 120 # time for one db_stress instance to run
|
+ "interval_between_crash=" + str(cmd_params['interval']) + "\n"
|
||||||
duration = 6000 # total time for this script to test db_stress
|
+ "total-duration=" + str(cmd_params['duration']) + "\n"
|
||||||
threads = 32
|
+ "threads=" + str(cmd_params['threads']) + "\n"
|
||||||
# since we will be killing anyway, use large value for ops_per_thread
|
+ "ops_per_thread=" + str(cmd_params['ops_per_thread']) + "\n"
|
||||||
ops_per_thread = 100000000
|
+ "write_buffer_size=" + str(cmd_params['write_buffer_size']) + "\n")
|
||||||
write_buf_size = 4 * 1024 * 1024
|
|
||||||
simple_mode = False
|
|
||||||
write_buf_size_set = False
|
|
||||||
for opt, arg in opts:
|
|
||||||
if opt == '-h':
|
|
||||||
print("db_crashtest.py -d <duration_test>"
|
|
||||||
" -t <#threads> -i <interval for one run>"
|
|
||||||
" -o <ops_per_thread> -b <write_buffer_size>"
|
|
||||||
" [-s (simple mode)]\n")
|
|
||||||
sys.exit()
|
|
||||||
elif opt == '-s':
|
|
||||||
simple_mode = True
|
|
||||||
if not write_buf_size_set:
|
|
||||||
write_buf_size = 32 * 1024 * 1024
|
|
||||||
elif opt == "-d":
|
|
||||||
duration = int(arg)
|
|
||||||
elif opt == "-t":
|
|
||||||
threads = int(arg)
|
|
||||||
elif opt == "-i":
|
|
||||||
interval = int(arg)
|
|
||||||
elif opt == "-o":
|
|
||||||
ops_per_thread = int(arg)
|
|
||||||
elif opt == "-b":
|
|
||||||
write_buf_size = int(arg)
|
|
||||||
write_buf_size_set = True
|
|
||||||
else:
|
|
||||||
print("db_crashtest.py -d <duration_test>"
|
|
||||||
" -t <#threads> -i <interval for one run>"
|
|
||||||
" -o <ops_per_thread> -b <write_buffer_size>\n")
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
exit_time = time.time() + duration
|
|
||||||
|
|
||||||
print("Running blackbox-crash-test with \ninterval_between_crash="
|
|
||||||
+ str(interval) + "\ntotal-duration=" + str(duration)
|
|
||||||
+ "\nthreads=" + str(threads) + "\nops_per_thread="
|
|
||||||
+ str(ops_per_thread) + "\nwrite_buffer_size="
|
|
||||||
+ str(write_buf_size) + "\n")
|
|
||||||
|
|
||||||
test_tmpdir = os.environ.get("TEST_TMPDIR")
|
|
||||||
if test_tmpdir is None or test_tmpdir == "":
|
|
||||||
dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest_')
|
|
||||||
else:
|
|
||||||
dbname = test_tmpdir + "/rocksdb_crashtest"
|
|
||||||
shutil.rmtree(dbname, True)
|
|
||||||
|
|
||||||
while time.time() < exit_time:
|
while time.time() < exit_time:
|
||||||
run_had_errors = False
|
run_had_errors = False
|
||||||
killtime = time.time() + interval
|
killtime = time.time() + cmd_params['interval']
|
||||||
|
|
||||||
if simple_mode:
|
cmd = gen_cmd(cmd_params)
|
||||||
cmd = re.sub('\s+', ' ', """
|
|
||||||
./db_stress
|
|
||||||
--column_families=1
|
|
||||||
--test_batches_snapshots=0
|
|
||||||
--ops_per_thread=%s
|
|
||||||
--threads=%s
|
|
||||||
--write_buffer_size=%s
|
|
||||||
--destroy_db_initially=0
|
|
||||||
--reopen=20
|
|
||||||
--readpercent=50
|
|
||||||
--prefixpercent=0
|
|
||||||
--writepercent=35
|
|
||||||
--delpercent=5
|
|
||||||
--iterpercent=10
|
|
||||||
--db=%s
|
|
||||||
--max_key=100000000
|
|
||||||
--mmap_read=%s
|
|
||||||
--block_size=16384
|
|
||||||
--cache_size=1048576
|
|
||||||
--open_files=-1
|
|
||||||
--verify_checksum=1
|
|
||||||
--sync=0
|
|
||||||
--progress_reports=0
|
|
||||||
--disable_wal=0
|
|
||||||
--disable_data_sync=0
|
|
||||||
--target_file_size_base=16777216
|
|
||||||
--target_file_size_multiplier=1
|
|
||||||
--max_write_buffer_number=3
|
|
||||||
--max_background_compactions=1
|
|
||||||
--max_bytes_for_level_base=67108864
|
|
||||||
--filter_deletes=%s
|
|
||||||
--memtablerep=skip_list
|
|
||||||
--prefix_size=0
|
|
||||||
--set_options_one_in=0
|
|
||||||
""" % (ops_per_thread,
|
|
||||||
threads,
|
|
||||||
write_buf_size,
|
|
||||||
dbname,
|
|
||||||
random.randint(0, 1),
|
|
||||||
random.randint(0, 1)))
|
|
||||||
else:
|
|
||||||
cmd = re.sub('\s+', ' ', """
|
|
||||||
./db_stress
|
|
||||||
--test_batches_snapshots=1
|
|
||||||
--ops_per_thread=%s
|
|
||||||
--threads=%s
|
|
||||||
--write_buffer_size=%s
|
|
||||||
--destroy_db_initially=0
|
|
||||||
--reopen=20
|
|
||||||
--readpercent=45
|
|
||||||
--prefixpercent=5
|
|
||||||
--writepercent=35
|
|
||||||
--delpercent=5
|
|
||||||
--iterpercent=10
|
|
||||||
--db=%s
|
|
||||||
--max_key=100000000
|
|
||||||
--mmap_read=%s
|
|
||||||
--block_size=16384
|
|
||||||
--cache_size=1048576
|
|
||||||
--open_files=500000
|
|
||||||
--verify_checksum=1
|
|
||||||
--sync=0
|
|
||||||
--progress_reports=0
|
|
||||||
--disable_wal=0
|
|
||||||
--disable_data_sync=0
|
|
||||||
--target_file_size_base=2097152
|
|
||||||
--target_file_size_multiplier=2
|
|
||||||
--max_write_buffer_number=3
|
|
||||||
--max_background_compactions=20
|
|
||||||
--max_bytes_for_level_base=10485760
|
|
||||||
--filter_deletes=%s
|
|
||||||
--memtablerep=prefix_hash
|
|
||||||
--prefix_size=7
|
|
||||||
--set_options_one_in=10000
|
|
||||||
""" % (ops_per_thread,
|
|
||||||
threads,
|
|
||||||
write_buf_size,
|
|
||||||
dbname,
|
|
||||||
random.randint(0, 1),
|
|
||||||
random.randint(0, 1)))
|
|
||||||
|
|
||||||
child = subprocess.Popen([cmd],
|
child = subprocess.Popen([cmd],
|
||||||
stderr=subprocess.PIPE, shell=True)
|
stderr=subprocess.PIPE, shell=True)
|
||||||
@ -199,5 +221,140 @@ def main(argv):
|
|||||||
# we need to clean up after ourselves -- only do this on test success
|
# we need to clean up after ourselves -- only do this on test success
|
||||||
shutil.rmtree(dbname, True)
|
shutil.rmtree(dbname, True)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
sys.exit(main(sys.argv[1:]))
|
# This python script runs db_stress multiple times. Some runs with
|
||||||
|
# kill_random_test that causes rocksdb to crash at various points in code.
|
||||||
|
def whitebox_crash_main(args):
|
||||||
|
cmd_params = gen_cmd_params(args)
|
||||||
|
|
||||||
|
cur_time = time.time()
|
||||||
|
exit_time = cur_time + cmd_params['duration']
|
||||||
|
half_time = cur_time + cmd_params['duration'] / 2
|
||||||
|
|
||||||
|
print("Running whitebox-crash-test with \n"
|
||||||
|
+ "total-duration=" + str(cmd_params['duration']) + "\n"
|
||||||
|
+ "threads=" + str(cmd_params['threads']) + "\n"
|
||||||
|
+ "ops_per_thread=" + str(cmd_params['ops_per_thread']) + "\n"
|
||||||
|
+ "write_buffer_size=" + str(cmd_params['write_buffer_size']) + "\n")
|
||||||
|
|
||||||
|
total_check_mode = 4
|
||||||
|
check_mode = 0
|
||||||
|
kill_random_test = 97
|
||||||
|
kill_mode = 0
|
||||||
|
|
||||||
|
while time.time() < exit_time:
|
||||||
|
if check_mode == 0:
|
||||||
|
additional_opts = {
|
||||||
|
# use large ops per thread since we will kill it anyway
|
||||||
|
"ops_per_thread": 100 * cmd_params['ops_per_thread'],
|
||||||
|
}
|
||||||
|
# run with kill_random_test
|
||||||
|
if kill_mode == 0:
|
||||||
|
additional_opts.update({
|
||||||
|
"kill_random_test": kill_random_test,
|
||||||
|
})
|
||||||
|
elif kill_mode == 1:
|
||||||
|
additional_opts.update({
|
||||||
|
"kill_random_test": (kill_random_test / 3 + 1),
|
||||||
|
"kill_prefix_blacklist": "WritableFileWriter::Append,"
|
||||||
|
+ "WritableFileWriter::WriteBuffered",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Run kill mode 0 and 1 by turn.
|
||||||
|
kill_mode = (kill_mode + 1) % 2
|
||||||
|
elif check_mode == 1:
|
||||||
|
# normal run with universal compaction mode
|
||||||
|
additional_opts = {
|
||||||
|
"kill_random_test": None,
|
||||||
|
"ops_per_thread": cmd_params['ops_per_thread'],
|
||||||
|
"compaction_style": 1,
|
||||||
|
}
|
||||||
|
elif check_mode == 2:
|
||||||
|
# normal run with FIFO compaction mode
|
||||||
|
# ops_per_thread is divided by 5 because FIFO compaction
|
||||||
|
# style is quite a bit slower on reads with lot of files
|
||||||
|
additional_opts = {
|
||||||
|
"kill_random_test": None,
|
||||||
|
"ops_per_thread": cmd_params['ops_per_thread'] / 5,
|
||||||
|
"compaction_style": 2,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# normal run
|
||||||
|
additional_opts = additional_opts = {
|
||||||
|
"kill_random_test": None,
|
||||||
|
"ops_per_thread": cmd_params['ops_per_thread'],
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd = gen_cmd(dict(cmd_params.items() + additional_opts.items()))
|
||||||
|
|
||||||
|
print "Running:" + cmd + "\n"
|
||||||
|
|
||||||
|
popen = subprocess.Popen([cmd], stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
shell=True)
|
||||||
|
stdoutdata, stderrdata = popen.communicate()
|
||||||
|
retncode = popen.returncode
|
||||||
|
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
|
||||||
|
check_mode, additional_opts['kill_random_test'], retncode))
|
||||||
|
print msg
|
||||||
|
print stdoutdata
|
||||||
|
|
||||||
|
expected = False
|
||||||
|
if additional_opts['kill_random_test'] is None and (retncode == 0):
|
||||||
|
# we expect zero retncode if no kill option
|
||||||
|
expected = True
|
||||||
|
elif additional_opts['kill_random_test'] is not None and retncode < 0:
|
||||||
|
# we expect negative retncode if kill option was given
|
||||||
|
expected = True
|
||||||
|
|
||||||
|
if not expected:
|
||||||
|
print "TEST FAILED. See kill option and exit code above!!!\n"
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
stdoutdata = stdoutdata.lower()
|
||||||
|
errorcount = (stdoutdata.count('error') -
|
||||||
|
stdoutdata.count('got errors 0 times'))
|
||||||
|
print "#times error occurred in output is " + str(errorcount) + "\n"
|
||||||
|
|
||||||
|
if (errorcount > 0):
|
||||||
|
print "TEST FAILED. Output has 'error'!!!\n"
|
||||||
|
sys.exit(2)
|
||||||
|
if (stdoutdata.find('fail') >= 0):
|
||||||
|
print "TEST FAILED. Output has 'fail'!!!\n"
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
# First half of the duration, keep doing kill test. For the next half,
|
||||||
|
# try different modes.
|
||||||
|
if time.time() > half_time:
|
||||||
|
# we need to clean up after ourselves -- only do this on test
|
||||||
|
# success
|
||||||
|
shutil.rmtree(dbname, True)
|
||||||
|
check_mode = (check_mode + 1) % total_check_mode
|
||||||
|
|
||||||
|
time.sleep(1) # time to stabilize after a kill
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="This script runs and kills \
|
||||||
|
db_stress multiple times")
|
||||||
|
parser.add_argument("test_type", choices=["blackbox", "whitebox"])
|
||||||
|
parser.add_argument("--simple", action="store_true")
|
||||||
|
|
||||||
|
all_params = dict(default_params.items()
|
||||||
|
+ blackbox_default_params.items()
|
||||||
|
+ whitebox_default_params.items()
|
||||||
|
+ simple_default_params.items()
|
||||||
|
+ blackbox_simple_default_params.items()
|
||||||
|
+ whitebox_simple_default_params.items())
|
||||||
|
|
||||||
|
for k, v in all_params.items():
|
||||||
|
parser.add_argument("--" + k, type=type(v() if callable(v) else v))
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.test_type == 'blackbox':
|
||||||
|
blackbox_crash_main(args)
|
||||||
|
if args.test_type == 'whitebox':
|
||||||
|
whitebox_crash_main(args)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
@ -1,248 +0,0 @@
|
|||||||
#! /usr/bin/env python
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import random
|
|
||||||
import getopt
|
|
||||||
import logging
|
|
||||||
import tempfile
|
|
||||||
import subprocess
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
# This python script runs db_stress multiple times. Some runs with
|
|
||||||
# kill_random_test that causes rocksdb to crash at various points in code.
|
|
||||||
|
|
||||||
def main(argv):
|
|
||||||
try:
|
|
||||||
opts, args = getopt.getopt(argv, "hsd:t:k:o:b:")
|
|
||||||
except getopt.GetoptError:
|
|
||||||
print str(getopt.GetoptError)
|
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> "\
|
|
||||||
"-b <write_buffer_size> [-s (simple mode)]\n"
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
# default values, will be overridden by cmdline args
|
|
||||||
kill_random_test = 97 # kill with probability 1/97 by default
|
|
||||||
duration = 10000 # total time for this script to test db_stress
|
|
||||||
threads = 32
|
|
||||||
ops_per_thread = 200000
|
|
||||||
write_buf_size = 4 * 1024 * 1024
|
|
||||||
simple_mode = False
|
|
||||||
write_buf_size_set = False
|
|
||||||
|
|
||||||
for opt, arg in opts:
|
|
||||||
if opt == '-h':
|
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
|
||||||
"-b <write_buffer_size> [-s (simple mode)]\n"
|
|
||||||
sys.exit()
|
|
||||||
elif opt == '-s':
|
|
||||||
simple_mode = True
|
|
||||||
if not write_buf_size_set:
|
|
||||||
write_buf_size = 32 * 1024 * 1024
|
|
||||||
elif opt == "-d":
|
|
||||||
duration = int(arg)
|
|
||||||
elif opt == "-t":
|
|
||||||
threads = int(arg)
|
|
||||||
elif opt == "-k":
|
|
||||||
kill_random_test = int(arg)
|
|
||||||
elif opt == "-o":
|
|
||||||
ops_per_thread = int(arg)
|
|
||||||
elif opt == "-b":
|
|
||||||
write_buf_size = int(arg)
|
|
||||||
write_buf_size_set = True
|
|
||||||
else:
|
|
||||||
print "unrecognized option " + str(opt) + "\n"
|
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
|
||||||
"-b <write_buffer_size>\n"
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
cur_time = time.time()
|
|
||||||
exit_time = cur_time + duration
|
|
||||||
half_time = cur_time + duration / 2
|
|
||||||
|
|
||||||
print "Running whitebox-crash-test with \ntotal-duration=" + str(duration) \
|
|
||||||
+ "\nthreads=" + str(threads) + "\nops_per_thread=" \
|
|
||||||
+ str(ops_per_thread) + "\nwrite_buffer_size=" \
|
|
||||||
+ str(write_buf_size) + "\n"
|
|
||||||
|
|
||||||
total_check_mode = 4
|
|
||||||
check_mode = 0
|
|
||||||
kill_mode = 0
|
|
||||||
|
|
||||||
test_tmpdir = os.environ.get("TEST_TMPDIR")
|
|
||||||
if test_tmpdir is None or test_tmpdir == "":
|
|
||||||
dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest2_')
|
|
||||||
else:
|
|
||||||
dbname = test_tmpdir + "/rocksdb_crashtest2"
|
|
||||||
shutil.rmtree(dbname, True)
|
|
||||||
|
|
||||||
while time.time() < exit_time:
|
|
||||||
killoption = ""
|
|
||||||
if check_mode == 0:
|
|
||||||
# run with kill_random_test
|
|
||||||
if kill_mode == 0:
|
|
||||||
killoption = " --kill_random_test=" + str(kill_random_test)
|
|
||||||
elif kill_mode == 1:
|
|
||||||
# Remove kill point for normal reads and reduce kill odds
|
|
||||||
# by 3, so that it still runs about one minutes in average
|
|
||||||
# before hitting a crash point.
|
|
||||||
killoption = " --kill_random_test=" + \
|
|
||||||
str(kill_random_test / 3 + 1)
|
|
||||||
killoption += \
|
|
||||||
" --kill_prefix_blacklist=WritableFileWriter::Append," \
|
|
||||||
"WritableFileWriter::WriteBuffered"
|
|
||||||
# Run kill mode 0 and 1 by turn.
|
|
||||||
kill_mode = (kill_mode + 1) % 2
|
|
||||||
# use large ops per thread since we will kill it anyway
|
|
||||||
additional_opts = "--ops_per_thread=" + \
|
|
||||||
str(100 * ops_per_thread) + killoption
|
|
||||||
elif check_mode == 1:
|
|
||||||
# normal run with universal compaction mode
|
|
||||||
additional_opts = "--ops_per_thread=" + str(ops_per_thread) + \
|
|
||||||
" --compaction_style=1"
|
|
||||||
elif check_mode == 2:
|
|
||||||
# normal run with FIFO compaction mode
|
|
||||||
# ops_per_thread is divided by 5 because FIFO compaction
|
|
||||||
# style is quite a bit slower on reads with lot of files
|
|
||||||
additional_opts = "--ops_per_thread=" + str(ops_per_thread / 5) + \
|
|
||||||
" --compaction_style=2"
|
|
||||||
else:
|
|
||||||
# normal run
|
|
||||||
additional_opts = "--ops_per_thread=" + str(ops_per_thread)
|
|
||||||
|
|
||||||
if simple_mode:
|
|
||||||
cmd = re.sub('\s+', ' ', """
|
|
||||||
./db_stress
|
|
||||||
--column_families=1
|
|
||||||
--threads=%s
|
|
||||||
--write_buffer_size=%s
|
|
||||||
--destroy_db_initially=0
|
|
||||||
--reopen=20
|
|
||||||
--prefixpercent=0
|
|
||||||
--readpercent=50
|
|
||||||
--writepercent=35
|
|
||||||
--delpercent=5
|
|
||||||
--iterpercent=10
|
|
||||||
--db=%s
|
|
||||||
--max_key=100000000
|
|
||||||
--mmap_read=%s
|
|
||||||
--block_size=16384
|
|
||||||
--cache_size=1048576
|
|
||||||
--open_files=500000
|
|
||||||
--verify_checksum=1
|
|
||||||
--sync=0
|
|
||||||
--progress_reports=0
|
|
||||||
--disable_wal=0
|
|
||||||
--disable_data_sync=0
|
|
||||||
--target_file_size_base=16777216
|
|
||||||
--target_file_size_multiplier=1
|
|
||||||
--max_write_buffer_number=3
|
|
||||||
--max_background_compactions=1
|
|
||||||
--max_bytes_for_level_base=67108864
|
|
||||||
--filter_deletes=%s
|
|
||||||
--memtablerep=skip_list
|
|
||||||
--prefix_size=0
|
|
||||||
--nooverwritepercent=1
|
|
||||||
--log2_keys_per_lock=10
|
|
||||||
%s
|
|
||||||
""" % (threads,
|
|
||||||
write_buf_size,
|
|
||||||
dbname,
|
|
||||||
random.randint(0, 1),
|
|
||||||
random.randint(0, 1),
|
|
||||||
additional_opts))
|
|
||||||
else:
|
|
||||||
cmd = re.sub('\s+', ' ', """
|
|
||||||
./db_stress
|
|
||||||
--test_batches_snapshots=%s
|
|
||||||
--threads=%s
|
|
||||||
--write_buffer_size=%s
|
|
||||||
--destroy_db_initially=0
|
|
||||||
--reopen=20
|
|
||||||
--readpercent=45
|
|
||||||
--prefixpercent=5
|
|
||||||
--writepercent=35
|
|
||||||
--delpercent=5
|
|
||||||
--iterpercent=10
|
|
||||||
--db=%s
|
|
||||||
--max_key=100000000
|
|
||||||
--mmap_read=%s
|
|
||||||
--block_size=16384
|
|
||||||
--cache_size=1048576
|
|
||||||
--open_files=500000
|
|
||||||
--verify_checksum=1
|
|
||||||
--sync=0
|
|
||||||
--progress_reports=0
|
|
||||||
--disable_wal=0
|
|
||||||
--disable_data_sync=0
|
|
||||||
--target_file_size_base=2097152
|
|
||||||
--target_file_size_multiplier=2
|
|
||||||
--max_write_buffer_number=3
|
|
||||||
--max_background_compactions=20
|
|
||||||
--max_bytes_for_level_base=10485760
|
|
||||||
--filter_deletes=%s
|
|
||||||
--memtablerep=prefix_hash
|
|
||||||
--prefix_size=7
|
|
||||||
--nooverwritepercent=1
|
|
||||||
--log2_keys_per_lock=10
|
|
||||||
%s
|
|
||||||
""" % (random.randint(0, 1),
|
|
||||||
threads,
|
|
||||||
write_buf_size,
|
|
||||||
dbname,
|
|
||||||
random.randint(0, 1),
|
|
||||||
random.randint(0, 1),
|
|
||||||
additional_opts))
|
|
||||||
|
|
||||||
print "Running:" + cmd + "\n"
|
|
||||||
|
|
||||||
popen = subprocess.Popen([cmd], stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.STDOUT,
|
|
||||||
shell=True)
|
|
||||||
stdoutdata, stderrdata = popen.communicate()
|
|
||||||
retncode = popen.returncode
|
|
||||||
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
|
|
||||||
check_mode, killoption, retncode))
|
|
||||||
print msg
|
|
||||||
print stdoutdata
|
|
||||||
|
|
||||||
expected = False
|
|
||||||
if (killoption == '') and (retncode == 0):
|
|
||||||
# we expect zero retncode if no kill option
|
|
||||||
expected = True
|
|
||||||
elif killoption != '' and retncode < 0:
|
|
||||||
# we expect negative retncode if kill option was given
|
|
||||||
expected = True
|
|
||||||
|
|
||||||
if not expected:
|
|
||||||
print "TEST FAILED. See kill option and exit code above!!!\n"
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
stdoutdata = stdoutdata.lower()
|
|
||||||
errorcount = (stdoutdata.count('error') -
|
|
||||||
stdoutdata.count('got errors 0 times'))
|
|
||||||
print "#times error occurred in output is " + str(errorcount) + "\n"
|
|
||||||
|
|
||||||
if (errorcount > 0):
|
|
||||||
print "TEST FAILED. Output has 'error'!!!\n"
|
|
||||||
sys.exit(2)
|
|
||||||
if (stdoutdata.find('fail') >= 0):
|
|
||||||
print "TEST FAILED. Output has 'fail'!!!\n"
|
|
||||||
sys.exit(2)
|
|
||||||
|
|
||||||
# First half of the duration, keep doing kill test. For the next half,
|
|
||||||
# try different modes.
|
|
||||||
if time.time() > half_time:
|
|
||||||
# we need to clean up after ourselves -- only do this on test
|
|
||||||
# success
|
|
||||||
shutil.rmtree(dbname, True)
|
|
||||||
check_mode = (check_mode + 1) % total_check_mode
|
|
||||||
|
|
||||||
time.sleep(1) # time to stabilize after a kill
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
sys.exit(main(sys.argv[1:]))
|
|
Loading…
Reference in New Issue
Block a user