Add universal compaction to db_stress nightly build
Summary: Most code change in this diff is code cleanup/rewrite. The logic changes include: (1) add universal compaction to db_crashtest2.py (2) randomly set --test_batches_snapshots to be 0 or 1 in db_crashtest2.py. Old codes always use 1. (3) use different tmp directory as db directory in different runs. I saw some intermittent errors in my local tests. Use of different tmp directory seems to be able to solve the issue. Test Plan: Have run "make crashtest" for multiple times. Also run "make all check" Reviewers: emayanke, dhruba, haobo Reviewed By: emayanke Differential Revision: https://reviews.facebook.net/D12369
This commit is contained in:
parent
b87dcae1a3
commit
af732c7ba8
@ -1,5 +1,6 @@
|
|||||||
#! /usr/bin/env python
|
#! /usr/bin/env python
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
@ -8,18 +9,16 @@ import logging
|
|||||||
import tempfile
|
import tempfile
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
# This python script runs and kills db_stress multiple times with
|
# This script runs and kills db_stress multiple times. It checks consistency
|
||||||
# test-batches-snapshot ON,
|
# in case of unsafe crashes in Rocksdb.
|
||||||
# total operations much less than the total keys, and
|
|
||||||
# a high read percentage.
|
|
||||||
# This checks consistency in case of unsafe crashes in Rocksdb
|
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
try:
|
try:
|
||||||
opts, args = getopt.getopt(argv, "hd:t:i:o:b:")
|
opts, args = getopt.getopt(argv, "hd:t:i:o:b:")
|
||||||
except getopt.GetoptError:
|
except getopt.GetoptError:
|
||||||
print("db_crashtest.py -d <duration_test> -t <#threads> "
|
print("db_crashtest.py -d <duration_test> -t <#threads> "
|
||||||
"-i <interval for one run> -o <ops_per_thread>\n")
|
"-i <interval for one run> -o <ops_per_thread> "
|
||||||
|
"-b <write_buffer_size>\n")
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
# default values, will be overridden by cmdline args
|
# default values, will be overridden by cmdline args
|
||||||
@ -36,15 +35,15 @@ def main(argv):
|
|||||||
" -t <#threads> -i <interval for one run>"
|
" -t <#threads> -i <interval for one run>"
|
||||||
" -o <ops_per_thread> -b <write_buffer_size>\n")
|
" -o <ops_per_thread> -b <write_buffer_size>\n")
|
||||||
sys.exit()
|
sys.exit()
|
||||||
elif opt == ("-d"):
|
elif opt == "-d":
|
||||||
duration = int(arg)
|
duration = int(arg)
|
||||||
elif opt == ("-t"):
|
elif opt == "-t":
|
||||||
threads = int(arg)
|
threads = int(arg)
|
||||||
elif opt == ("-i"):
|
elif opt == "-i":
|
||||||
interval = int(arg)
|
interval = int(arg)
|
||||||
elif opt == ("-o"):
|
elif opt == "-o":
|
||||||
ops_per_thread = int(arg)
|
ops_per_thread = int(arg)
|
||||||
elif opt == ("-b"):
|
elif opt == "-b":
|
||||||
write_buf_size = int(arg)
|
write_buf_size = int(arg)
|
||||||
else:
|
else:
|
||||||
print("db_crashtest.py -d <duration_test>"
|
print("db_crashtest.py -d <duration_test>"
|
||||||
@ -54,8 +53,6 @@ def main(argv):
|
|||||||
|
|
||||||
exit_time = time.time() + duration
|
exit_time = time.time() + duration
|
||||||
|
|
||||||
dirpath = tempfile.mkdtemp()
|
|
||||||
|
|
||||||
print("Running blackbox-crash-test with \ninterval_between_crash="
|
print("Running blackbox-crash-test with \ninterval_between_crash="
|
||||||
+ str(interval) + "\ntotal-duration=" + str(duration)
|
+ str(interval) + "\ntotal-duration=" + str(duration)
|
||||||
+ "\nthreads=" + str(threads) + "\nops_per_thread="
|
+ "\nthreads=" + str(threads) + "\nops_per_thread="
|
||||||
@ -64,62 +61,75 @@ def main(argv):
|
|||||||
|
|
||||||
while time.time() < exit_time:
|
while time.time() < exit_time:
|
||||||
run_had_errors = False
|
run_had_errors = False
|
||||||
additional_opts = ' --disable_seek_compaction=' + \
|
|
||||||
str(random.randint(0, 1)) + \
|
|
||||||
' --mmap_read=' + str(random.randint(0, 1)) + \
|
|
||||||
' --block_size=16384 ' + \
|
|
||||||
' --cache_size=1048576 ' + \
|
|
||||||
' --open_files=500000 ' + \
|
|
||||||
' --verify_checksum=1 ' + \
|
|
||||||
' --sync=' + str(random.randint(0, 1)) + \
|
|
||||||
' --disable_wal=0 ' + \
|
|
||||||
' --disable_data_sync=' + \
|
|
||||||
str(random.randint(0, 1)) + \
|
|
||||||
' --target_file_size_base=2097152 ' + \
|
|
||||||
' --target_file_size_multiplier=2 ' + \
|
|
||||||
' --max_write_buffer_number=3 ' + \
|
|
||||||
' --max_background_compactions=20 ' + \
|
|
||||||
' --max_bytes_for_level_base=10485760 ' + \
|
|
||||||
' --filter_deletes=' + str(random.randint(0, 1))
|
|
||||||
killtime = time.time() + interval
|
killtime = time.time() + interval
|
||||||
child = subprocess.Popen(['./db_stress \
|
|
||||||
--test_batches_snapshots=1 \
|
|
||||||
--ops_per_thread=0' + str(ops_per_thread) + ' \
|
|
||||||
--threads=0' + str(threads) + ' \
|
|
||||||
--write_buffer_size=' + str(write_buf_size) + '\
|
|
||||||
--destroy_db_initially=0 \
|
|
||||||
--reopen=0 \
|
|
||||||
--readpercent=50 \
|
|
||||||
--prefixpercent=5 \
|
|
||||||
--writepercent=40 \
|
|
||||||
--delpercent=5 \
|
|
||||||
--db=' + dirpath + '\
|
|
||||||
--max_key=100000000 ' + additional_opts],
|
|
||||||
stderr=subprocess.PIPE, shell=True)
|
|
||||||
print("Running db_stress with pid=%d and additional options=\n"
|
|
||||||
% child.pid + additional_opts + "\n")
|
|
||||||
time.sleep(interval)
|
|
||||||
while True:
|
|
||||||
if time.time() > killtime:
|
|
||||||
if child.poll() is not None:
|
|
||||||
print("WARNING: db_stress ended before kill\n")
|
|
||||||
else:
|
|
||||||
child.kill()
|
|
||||||
print("KILLED %d\n" % child.pid)
|
|
||||||
time.sleep(1) # time to stabilize after a kill
|
|
||||||
|
|
||||||
while True:
|
cmd = re.sub('\s+', ' ', """
|
||||||
line = child.stderr.readline().strip()
|
./db_stress
|
||||||
if line != '':
|
--test_batches_snapshots=1
|
||||||
run_had_errors = True
|
--ops_per_thread=%s
|
||||||
print('***' + line + '^')
|
--threads=%s
|
||||||
else:
|
--write_buffer_size=%s
|
||||||
break
|
--destroy_db_initially=0
|
||||||
if run_had_errors:
|
--reopen=0
|
||||||
sys.exit(2)
|
--readpercent=50
|
||||||
|
--prefixpercent=5
|
||||||
|
--writepercent=40
|
||||||
|
--delpercent=5
|
||||||
|
--db=%s
|
||||||
|
--max_key=100000000
|
||||||
|
--disable_seek_compaction=%s
|
||||||
|
--mmap_read=%s
|
||||||
|
--block_size=16384
|
||||||
|
--cache_size=1048576
|
||||||
|
--open_files=500000
|
||||||
|
--verify_checksum=1
|
||||||
|
--sync=%s
|
||||||
|
--disable_wal=0
|
||||||
|
--disable_data_sync=%s
|
||||||
|
--target_file_size_base=2097152
|
||||||
|
--target_file_size_multiplier=2
|
||||||
|
--max_write_buffer_number=3
|
||||||
|
--max_background_compactions=20
|
||||||
|
--max_bytes_for_level_base=10485760
|
||||||
|
--filter_deletes=%s
|
||||||
|
""" % (ops_per_thread,
|
||||||
|
threads,
|
||||||
|
write_buf_size,
|
||||||
|
tempfile.mkdtemp(),
|
||||||
|
random.randint(0, 1),
|
||||||
|
random.randint(0, 1),
|
||||||
|
random.randint(0, 1),
|
||||||
|
random.randint(0, 1),
|
||||||
|
random.randint(0, 1)))
|
||||||
|
|
||||||
|
child = subprocess.Popen([cmd],
|
||||||
|
stderr=subprocess.PIPE, shell=True)
|
||||||
|
print("Running db_stress with pid=%d: %s\n\n"
|
||||||
|
% (child.pid, cmd))
|
||||||
|
|
||||||
|
while time.time() < killtime:
|
||||||
|
time.sleep(10)
|
||||||
|
|
||||||
|
if child.poll() is not None:
|
||||||
|
print("WARNING: db_stress ended before kill: exitcode=%d\n"
|
||||||
|
% child.returncode)
|
||||||
|
else:
|
||||||
|
child.kill()
|
||||||
|
print("KILLED %d\n" % child.pid)
|
||||||
|
time.sleep(1) # time to stabilize after a kill
|
||||||
|
|
||||||
|
while True:
|
||||||
|
line = child.stderr.readline().strip()
|
||||||
|
if line != '':
|
||||||
|
run_had_errors = True
|
||||||
|
print('***' + line + '^')
|
||||||
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
time.sleep(1) # time to stabilize before the next run
|
if run_had_errors:
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
time.sleep(1) # time to stabilize before the next run
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
sys.exit(main(sys.argv[1:]))
|
sys.exit(main(sys.argv[1:]))
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#! /usr/bin/env python
|
#! /usr/bin/env python
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
import random
|
import random
|
||||||
@ -8,24 +9,22 @@ import logging
|
|||||||
import tempfile
|
import tempfile
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
# This python script runs db_stress multiple times with kill_random_test
|
# This python script runs db_stress multiple times. Some runs with
|
||||||
# that causes leveldb to crash at various points in code.
|
# kill_random_test that causes leveldb to crash at various points in code.
|
||||||
# It also has test-batches-snapshot ON so that basic atomic/consistency
|
|
||||||
# checks can be performed.
|
|
||||||
#
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
try:
|
try:
|
||||||
opts, args = getopt.getopt(argv, "hd:t:k:o:b:")
|
opts, args = getopt.getopt(argv, "hd:t:k:o:b:")
|
||||||
except getopt.GetoptError:
|
except getopt.GetoptError:
|
||||||
print str(getopt.GetoptError)
|
print str(getopt.GetoptError)
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> "\
|
"-k <kills with prob 1/k> -o <ops_per_thread> "\
|
||||||
"-b <write_buffer_size>\n"
|
"-b <write_buffer_size>\n"
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
# default values, will be overridden by cmdline args
|
# default values, will be overridden by cmdline args
|
||||||
kill_random_test = 97 # kill with probability 1/97 by default
|
kill_random_test = 97 # kill with probability 1/97 by default
|
||||||
duration = 6000 # total time for this script to test db_stress
|
duration = 10000 # total time for this script to test db_stress
|
||||||
threads = 32
|
threads = 32
|
||||||
ops_per_thread = 200000
|
ops_per_thread = 200000
|
||||||
write_buf_size = 4 * 1024 * 1024
|
write_buf_size = 4 * 1024 * 1024
|
||||||
@ -33,93 +32,101 @@ def main(argv):
|
|||||||
for opt, arg in opts:
|
for opt, arg in opts:
|
||||||
if opt == '-h':
|
if opt == '-h':
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> "\
|
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
||||||
"-b <write_buffer_size>\n"
|
"-b <write_buffer_size>\n"
|
||||||
sys.exit()
|
sys.exit()
|
||||||
elif opt == ("-d"):
|
elif opt == "-d":
|
||||||
duration = int(arg)
|
duration = int(arg)
|
||||||
elif opt == ("-t"):
|
elif opt == "-t":
|
||||||
threads = int(arg)
|
threads = int(arg)
|
||||||
elif opt == ("-k"):
|
elif opt == "-k":
|
||||||
kill_random_test = int(arg)
|
kill_random_test = int(arg)
|
||||||
elif opt == ("-i"):
|
elif opt == "-o":
|
||||||
interval = int(arg)
|
|
||||||
elif opt == ("-o"):
|
|
||||||
ops_per_thread = int(arg)
|
ops_per_thread = int(arg)
|
||||||
elif opt == ("-b"):
|
elif opt == "-b":
|
||||||
write_buf_size = int(arg)
|
write_buf_size = int(arg)
|
||||||
else:
|
else:
|
||||||
print "unrecognized option " + str(opt) + "\n"
|
print "unrecognized option " + str(opt) + "\n"
|
||||||
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
print "db_crashtest2.py -d <duration_test> -t <#threads> " \
|
||||||
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
"-k <kills with prob 1/k> -o <ops_per_thread> " \
|
||||||
"-b <write_buffer_size>\n"
|
"-b <write_buffer_size>\n"
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
exit_time = time.time() + duration
|
exit_time = time.time() + duration
|
||||||
|
|
||||||
dirpath = tempfile.mkdtemp()
|
print "Running whitebox-crash-test with \ntotal-duration=" + str(duration) \
|
||||||
|
+ "\nthreads=" + str(threads) + "\nops_per_thread=" \
|
||||||
|
+ str(ops_per_thread) + "\nwrite_buffer_size=" \
|
||||||
|
+ str(write_buf_size) + "\n"
|
||||||
|
|
||||||
print("Running whitebox-crash-test with \ntotal-duration=" + str(duration)
|
total_check_mode = 3
|
||||||
+ "\nthreads=" + str(threads) + "\nops_per_thread="
|
check_mode = 0
|
||||||
+ str(ops_per_thread) + "\nwrite_buffer_size="
|
|
||||||
+ str(write_buf_size) + "\n")
|
|
||||||
|
|
||||||
# kill in every alternate run. toggle tracks which run we are doing.
|
|
||||||
toggle = True
|
|
||||||
|
|
||||||
while time.time() < exit_time:
|
while time.time() < exit_time:
|
||||||
run_had_errors = False
|
killoption = ""
|
||||||
additional_opts = ' --disable_seek_compaction=' + \
|
if check_mode == 0:
|
||||||
str(random.randint(0, 1)) + \
|
# run with kill_random_test
|
||||||
' --mmap_read=' + str(random.randint(0, 1)) + \
|
killoption = " --kill_random_test=" + str(kill_random_test)
|
||||||
' --block_size=16384 ' + \
|
# use large ops per thread since we will kill it anyway
|
||||||
' --cache_size=1048576 ' + \
|
additional_opts = "--ops_per_thread=" + \
|
||||||
' --open_files=500000 ' + \
|
str(100 * ops_per_thread) + killoption
|
||||||
' --verify_checksum=1 ' + \
|
elif check_mode == 1:
|
||||||
' --sync=' + str(random.randint(0, 1)) + \
|
# normal run with universal compaction mode
|
||||||
' --disable_wal=0 ' + \
|
additional_opts = "--ops_per_thread=" + str(ops_per_thread) + \
|
||||||
' --disable_data_sync=' + \
|
" --compaction_style=1"
|
||||||
str(random.randint(0, 1)) + \
|
|
||||||
' --target_file_size_base=2097152 ' + \
|
|
||||||
' --target_file_size_multiplier=2 ' + \
|
|
||||||
' --max_write_buffer_number=3 ' + \
|
|
||||||
' --max_background_compactions=20 ' + \
|
|
||||||
' --max_bytes_for_level_base=10485760 ' + \
|
|
||||||
' --filter_deletes=' + str(random.randint(0, 1))
|
|
||||||
print ("Running db_stress with additional options=\n"
|
|
||||||
+ additional_opts + "\n")
|
|
||||||
|
|
||||||
if toggle:
|
|
||||||
# since we are going to kill anyway, use more ops per thread
|
|
||||||
new_ops_per_thread = 100 * ops_per_thread
|
|
||||||
killoption = '--kill_random_test=' + str(kill_random_test)
|
|
||||||
else:
|
else:
|
||||||
new_ops_per_thread = ops_per_thread
|
# nomral run
|
||||||
killoption = ''
|
additional_opts = "--ops_per_thread=" + str(ops_per_thread)
|
||||||
|
|
||||||
toggle = not toggle
|
cmd = re.sub('\s+', ' ', """
|
||||||
|
./db_stress
|
||||||
|
--test_batches_snapshots=%s
|
||||||
|
--threads=%s
|
||||||
|
--write_buffer_size=%s
|
||||||
|
--destroy_db_initially=0
|
||||||
|
--reopen=0
|
||||||
|
--readpercent=50
|
||||||
|
--prefixpercent=5
|
||||||
|
--writepercent=40
|
||||||
|
--delpercent=5
|
||||||
|
--db=%s
|
||||||
|
--max_key=100000000
|
||||||
|
--disable_seek_compaction=%s
|
||||||
|
--mmap_read=%s
|
||||||
|
--block_size=16384
|
||||||
|
--cache_size=1048576
|
||||||
|
--open_files=500000
|
||||||
|
--verify_checksum=1
|
||||||
|
--sync=%s
|
||||||
|
--disable_wal=0
|
||||||
|
--disable_data_sync=%s
|
||||||
|
--target_file_size_base=2097152
|
||||||
|
--target_file_size_multiplier=2
|
||||||
|
--max_write_buffer_number=3
|
||||||
|
--max_background_compactions=20
|
||||||
|
--max_bytes_for_level_base=10485760
|
||||||
|
--filter_deletes=%s
|
||||||
|
%s
|
||||||
|
""" % (random.randint(0, 1),
|
||||||
|
threads,
|
||||||
|
write_buf_size,
|
||||||
|
tempfile.mkdtemp(),
|
||||||
|
random.randint(0, 1),
|
||||||
|
random.randint(0, 1),
|
||||||
|
random.randint(0, 1),
|
||||||
|
random.randint(0, 1),
|
||||||
|
random.randint(0, 1),
|
||||||
|
additional_opts))
|
||||||
|
|
||||||
cmd = ['./db_stress \
|
print "Running:" + cmd + "\n"
|
||||||
--test_batches_snapshots=1 \
|
|
||||||
--ops_per_thread=0' + str(new_ops_per_thread) + ' \
|
|
||||||
--threads=0' + str(threads) + ' \
|
|
||||||
--write_buffer_size=' + str(write_buf_size) + ' \
|
|
||||||
--destroy_db_initially=0 ' + killoption + ' \
|
|
||||||
--reopen=0 \
|
|
||||||
--readpercent=50 \
|
|
||||||
--prefixpercent=5 \
|
|
||||||
--writepercent=40 \
|
|
||||||
--delpercent=5 \
|
|
||||||
--db=' + dirpath + ' \
|
|
||||||
--max_key=100000000 ' + additional_opts]
|
|
||||||
|
|
||||||
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
popen = subprocess.Popen([cmd], stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.STDOUT,
|
stderr=subprocess.STDOUT,
|
||||||
shell=True)
|
shell=True)
|
||||||
stdoutdata, stderrdata = popen.communicate()
|
stdoutdata, stderrdata = popen.communicate()
|
||||||
retncode = popen.returncode
|
retncode = popen.returncode
|
||||||
msg = ("kill option = {0}, exitcode = {1}".format(
|
msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
|
||||||
killoption, retncode))
|
check_mode, killoption, retncode))
|
||||||
print msg
|
print msg
|
||||||
print stdoutdata
|
print stdoutdata
|
||||||
|
|
||||||
@ -146,6 +153,9 @@ def main(argv):
|
|||||||
if (stdoutdata.find('fail') >= 0):
|
if (stdoutdata.find('fail') >= 0):
|
||||||
print "TEST FAILED. Output has 'fail'!!!\n"
|
print "TEST FAILED. Output has 'fail'!!!\n"
|
||||||
sys.exit(2)
|
sys.exit(2)
|
||||||
|
|
||||||
|
check_mode = (check_mode + 1) % total_check_mode
|
||||||
|
|
||||||
time.sleep(1) # time to stabilize after a kill
|
time.sleep(1) # time to stabilize after a kill
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user