2013-04-04 23:49:43 -07:00
|
|
|
#! /usr/bin/env python
|
|
|
|
import os
|
2013-08-20 17:37:49 -07:00
|
|
|
import re
|
2013-04-04 23:49:43 -07:00
|
|
|
import sys
|
|
|
|
import time
|
2013-06-08 12:29:43 -07:00
|
|
|
import random
|
2013-04-04 23:49:43 -07:00
|
|
|
import getopt
|
|
|
|
import logging
|
|
|
|
import tempfile
|
|
|
|
import subprocess
|
2014-03-20 11:11:08 -07:00
|
|
|
import shutil
|
2013-04-04 23:49:43 -07:00
|
|
|
|
2013-08-20 17:37:49 -07:00
|
|
|
# This python script runs db_stress multiple times. Some runs with
|
2013-10-04 22:32:05 -07:00
|
|
|
# kill_random_test that causes rocksdb to crash at various points in code.
|
2013-08-20 17:37:49 -07:00
|
|
|
|
2013-04-04 23:49:43 -07:00
|
|
|
def main(argv):
|
|
|
|
try:
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
opts, args = getopt.getopt(argv, "hsd:t:k:o:b:")
|
2013-04-04 23:49:43 -07:00
|
|
|
except getopt.GetoptError:
|
|
|
|
print str(getopt.GetoptError)
|
|
|
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
2013-08-20 17:37:49 -07:00
|
|
|
"-k <kills with prob 1/k> -o <ops_per_thread> "\
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
"-b <write_buffer_size> [-s (simple mode)]\n"
|
2013-04-04 23:49:43 -07:00
|
|
|
sys.exit(2)
|
|
|
|
|
|
|
|
# default values, will be overridden by cmdline args
|
|
|
|
kill_random_test = 97 # kill with probability 1/97 by default
|
2013-08-20 17:37:49 -07:00
|
|
|
duration = 10000 # total time for this script to test db_stress
|
2013-04-04 23:49:43 -07:00
|
|
|
threads = 32
|
|
|
|
ops_per_thread = 200000
|
|
|
|
write_buf_size = 4 * 1024 * 1024
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
simple_mode = False
|
|
|
|
write_buf_size_set = False
|
2013-04-04 23:49:43 -07:00
|
|
|
|
|
|
|
for opt, arg in opts:
|
|
|
|
if opt == '-h':
|
|
|
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
2013-08-20 17:37:49 -07:00
|
|
|
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
"-b <write_buffer_size> [-s (simple mode)]\n"
|
2013-04-04 23:49:43 -07:00
|
|
|
sys.exit()
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
elif opt == '-s':
|
|
|
|
simple_mode = True
|
|
|
|
if not write_buf_size_set:
|
|
|
|
write_buf_size = 32 * 1024 * 1024
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-d":
|
2013-04-04 23:49:43 -07:00
|
|
|
duration = int(arg)
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-t":
|
2013-04-04 23:49:43 -07:00
|
|
|
threads = int(arg)
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-k":
|
2013-04-04 23:49:43 -07:00
|
|
|
kill_random_test = int(arg)
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-o":
|
2013-04-04 23:49:43 -07:00
|
|
|
ops_per_thread = int(arg)
|
2013-08-20 17:37:49 -07:00
|
|
|
elif opt == "-b":
|
2013-04-04 23:49:43 -07:00
|
|
|
write_buf_size = int(arg)
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
write_buf_size_set = True
|
2013-04-04 23:49:43 -07:00
|
|
|
else:
|
|
|
|
print "unrecognized option " + str(opt) + "\n"
|
|
|
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
2013-08-20 17:37:49 -07:00
|
|
|
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
|
|
|
"-b <write_buffer_size>\n"
|
2013-04-04 23:49:43 -07:00
|
|
|
sys.exit(2)
|
|
|
|
|
2015-08-03 18:11:14 -07:00
|
|
|
cur_time = time.time()
|
|
|
|
exit_time = cur_time + duration
|
|
|
|
half_time = cur_time + duration / 2
|
2013-04-04 23:49:43 -07:00
|
|
|
|
2013-08-20 17:37:49 -07:00
|
|
|
print "Running whitebox-crash-test with \ntotal-duration=" + str(duration) \
|
|
|
|
+ "\nthreads=" + str(threads) + "\nops_per_thread=" \
|
|
|
|
+ str(ops_per_thread) + "\nwrite_buffer_size=" \
|
|
|
|
+ str(write_buf_size) + "\n"
|
2013-05-23 19:10:13 -07:00
|
|
|
|
2014-05-22 10:24:24 -07:00
|
|
|
total_check_mode = 4
|
2013-08-20 17:37:49 -07:00
|
|
|
check_mode = 0
|
2015-10-15 15:39:58 -07:00
|
|
|
kill_mode = 0
|
2013-04-04 23:49:43 -07:00
|
|
|
|
2015-08-04 11:35:44 -07:00
|
|
|
test_tmpdir = os.environ.get("TEST_TMPDIR")
|
|
|
|
if test_tmpdir is None or test_tmpdir == "":
|
|
|
|
dbname = tempfile.mkdtemp(prefix='rocksdb_crashtest2_')
|
|
|
|
else:
|
|
|
|
dbname = test_tmpdir + "/rocksdb_crashtest2"
|
2015-08-04 12:20:38 -07:00
|
|
|
shutil.rmtree(dbname, True)
|
2015-08-04 11:35:44 -07:00
|
|
|
|
2013-04-04 23:49:43 -07:00
|
|
|
while time.time() < exit_time:
|
2013-08-20 17:37:49 -07:00
|
|
|
killoption = ""
|
|
|
|
if check_mode == 0:
|
|
|
|
# run with kill_random_test
|
2015-10-15 15:39:58 -07:00
|
|
|
if kill_mode == 0:
|
|
|
|
killoption = " --kill_random_test=" + str(kill_random_test)
|
|
|
|
elif kill_mode == 1:
|
|
|
|
# Remove kill point for normal reads and reduce kill odds
|
|
|
|
# by 3, so that it still runs about one minutes in average
|
|
|
|
# before hitting a crash point.
|
|
|
|
killoption = " --kill_random_test=" + \
|
|
|
|
str(kill_random_test / 3 + 1)
|
|
|
|
killoption += \
|
|
|
|
" --kill_prefix_blacklist=WritableFileWriter::Append," \
|
|
|
|
"WritableFileWriter::WriteBuffered"
|
|
|
|
# Run kill mode 0 and 1 by turn.
|
|
|
|
kill_mode = (kill_mode + 1) % 2
|
2013-08-20 17:37:49 -07:00
|
|
|
# use large ops per thread since we will kill it anyway
|
|
|
|
additional_opts = "--ops_per_thread=" + \
|
|
|
|
str(100 * ops_per_thread) + killoption
|
|
|
|
elif check_mode == 1:
|
|
|
|
# normal run with universal compaction mode
|
|
|
|
additional_opts = "--ops_per_thread=" + str(ops_per_thread) + \
|
|
|
|
" --compaction_style=1"
|
2014-05-22 10:24:24 -07:00
|
|
|
elif check_mode == 2:
|
|
|
|
# normal run with FIFO compaction mode
|
|
|
|
# ops_per_thread is divided by 5 because FIFO compaction
|
|
|
|
# style is quite a bit slower on reads with lot of files
|
|
|
|
additional_opts = "--ops_per_thread=" + str(ops_per_thread / 5) + \
|
|
|
|
" --compaction_style=2"
|
2013-04-04 23:49:43 -07:00
|
|
|
else:
|
2014-05-22 10:24:24 -07:00
|
|
|
# normal run
|
2013-08-20 17:37:49 -07:00
|
|
|
additional_opts = "--ops_per_thread=" + str(ops_per_thread)
|
|
|
|
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
if simple_mode:
|
|
|
|
cmd = re.sub('\s+', ' ', """
|
|
|
|
./db_stress
|
|
|
|
--column_families=1
|
|
|
|
--threads=%s
|
|
|
|
--write_buffer_size=%s
|
|
|
|
--destroy_db_initially=0
|
|
|
|
--reopen=20
|
|
|
|
--prefixpercent=0
|
|
|
|
--readpercent=50
|
|
|
|
--writepercent=35
|
|
|
|
--delpercent=5
|
|
|
|
--iterpercent=10
|
|
|
|
--db=%s
|
|
|
|
--max_key=100000000
|
|
|
|
--mmap_read=%s
|
|
|
|
--block_size=16384
|
|
|
|
--cache_size=1048576
|
|
|
|
--open_files=500000
|
|
|
|
--verify_checksum=1
|
|
|
|
--sync=0
|
|
|
|
--progress_reports=0
|
|
|
|
--disable_wal=0
|
2015-10-14 13:55:10 -07:00
|
|
|
--disable_data_sync=0
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
--target_file_size_base=16777216
|
|
|
|
--target_file_size_multiplier=1
|
|
|
|
--max_write_buffer_number=3
|
|
|
|
--max_background_compactions=1
|
|
|
|
--max_bytes_for_level_base=67108864
|
|
|
|
--filter_deletes=%s
|
|
|
|
--memtablerep=skip_list
|
|
|
|
--prefix_size=0
|
2015-10-15 15:39:58 -07:00
|
|
|
--nooverwritepercent=1
|
|
|
|
--log2_keys_per_lock=10
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
%s
|
|
|
|
""" % (threads,
|
|
|
|
write_buf_size,
|
|
|
|
dbname,
|
|
|
|
random.randint(0, 1),
|
|
|
|
random.randint(0, 1),
|
|
|
|
additional_opts))
|
|
|
|
else:
|
|
|
|
cmd = re.sub('\s+', ' ', """
|
|
|
|
./db_stress
|
|
|
|
--test_batches_snapshots=%s
|
|
|
|
--threads=%s
|
|
|
|
--write_buffer_size=%s
|
|
|
|
--destroy_db_initially=0
|
|
|
|
--reopen=20
|
|
|
|
--readpercent=45
|
|
|
|
--prefixpercent=5
|
|
|
|
--writepercent=35
|
|
|
|
--delpercent=5
|
|
|
|
--iterpercent=10
|
|
|
|
--db=%s
|
|
|
|
--max_key=100000000
|
|
|
|
--mmap_read=%s
|
|
|
|
--block_size=16384
|
|
|
|
--cache_size=1048576
|
|
|
|
--open_files=500000
|
|
|
|
--verify_checksum=1
|
|
|
|
--sync=0
|
|
|
|
--progress_reports=0
|
|
|
|
--disable_wal=0
|
2015-10-14 13:55:10 -07:00
|
|
|
--disable_data_sync=0
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
--target_file_size_base=2097152
|
|
|
|
--target_file_size_multiplier=2
|
|
|
|
--max_write_buffer_number=3
|
|
|
|
--max_background_compactions=20
|
|
|
|
--max_bytes_for_level_base=10485760
|
|
|
|
--filter_deletes=%s
|
|
|
|
--memtablerep=prefix_hash
|
|
|
|
--prefix_size=7
|
2015-10-15 15:39:58 -07:00
|
|
|
--nooverwritepercent=1
|
|
|
|
--log2_keys_per_lock=10
|
crash_test to cover simply cases
Summary:
crash_test now only runs complicated options, multiple column families, prefix hash, frequently changing options, many compaction threads, etc. These options are good to cover new features but we loss coverage in most common use cases. Furthermore, by running only for multiple column families, we are not able to create LSM trees that are large enough to cover some stress cases.
Make half of crash_test runs the simply tests: single column family, default mem table, one compaction thread, no change options.
Test Plan: Run crash_test
Reviewers: rven, yhchiang, IslamAbdelRahman, kradhakrishnan
Reviewed By: kradhakrishnan
Subscribers: leveldb, dhruba
Differential Revision: https://reviews.facebook.net/D43461
2015-08-03 17:23:34 -07:00
|
|
|
%s
|
|
|
|
""" % (random.randint(0, 1),
|
|
|
|
threads,
|
|
|
|
write_buf_size,
|
|
|
|
dbname,
|
|
|
|
random.randint(0, 1),
|
|
|
|
random.randint(0, 1),
|
|
|
|
additional_opts))
|
2013-08-20 17:37:49 -07:00
|
|
|
|
|
|
|
print "Running:" + cmd + "\n"
|
|
|
|
|
|
|
|
popen = subprocess.Popen([cmd], stdout=subprocess.PIPE,
|
2013-05-23 19:10:13 -07:00
|
|
|
stderr=subprocess.STDOUT,
|
|
|
|
shell=True)
|
|
|
|
stdoutdata, stderrdata = popen.communicate()
|
|
|
|
retncode = popen.returncode
|
2013-08-20 17:37:49 -07:00
|
|
|
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
|
|
|
|
check_mode, killoption, retncode))
|
2013-05-23 19:10:13 -07:00
|
|
|
print msg
|
|
|
|
print stdoutdata
|
|
|
|
|
|
|
|
expected = False
|
|
|
|
if (killoption == '') and (retncode == 0):
|
|
|
|
# we expect zero retncode if no kill option
|
|
|
|
expected = True
|
|
|
|
elif killoption != '' and retncode < 0:
|
|
|
|
# we expect negative retncode if kill option was given
|
|
|
|
expected = True
|
|
|
|
|
|
|
|
if not expected:
|
2013-06-07 11:06:20 -07:00
|
|
|
print "TEST FAILED. See kill option and exit code above!!!\n"
|
2013-05-23 19:10:13 -07:00
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
stdoutdata = stdoutdata.lower()
|
2013-06-07 11:06:20 -07:00
|
|
|
errorcount = (stdoutdata.count('error') -
|
|
|
|
stdoutdata.count('got errors 0 times'))
|
2013-11-16 11:21:34 +00:00
|
|
|
print "#times error occurred in output is " + str(errorcount) + "\n"
|
2013-06-07 11:06:20 -07:00
|
|
|
|
|
|
|
if (errorcount > 0):
|
|
|
|
print "TEST FAILED. Output has 'error'!!!\n"
|
|
|
|
sys.exit(2)
|
|
|
|
if (stdoutdata.find('fail') >= 0):
|
|
|
|
print "TEST FAILED. Output has 'fail'!!!\n"
|
2013-05-23 19:10:13 -07:00
|
|
|
sys.exit(2)
|
2013-08-20 17:37:49 -07:00
|
|
|
|
2015-08-03 18:11:14 -07:00
|
|
|
# First half of the duration, keep doing kill test. For the next half,
|
|
|
|
# try different modes.
|
|
|
|
if time.time() > half_time:
|
|
|
|
# we need to clean up after ourselves -- only do this on test
|
|
|
|
# success
|
|
|
|
shutil.rmtree(dbname, True)
|
|
|
|
check_mode = (check_mode + 1) % total_check_mode
|
2013-08-20 17:37:49 -07:00
|
|
|
|
2013-04-04 23:49:43 -07:00
|
|
|
time.sleep(1) # time to stabilize after a kill
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
sys.exit(main(sys.argv[1:]))
|