diff --git a/tools/db_crashtest2.py b/tools/db_crashtest2.py new file mode 100644 index 000000000..4d1ff3a9b --- /dev/null +++ b/tools/db_crashtest2.py @@ -0,0 +1,108 @@ +#! /usr/bin/env python +import os +import sys +import time +import shlex +import getopt +import logging +import tempfile +import subprocess + +# This python script runs db_stress multiple times with kill_random_test +# that causes leveldb to crash at various points in code. +# It also has test-batches-snapshot ON so that basic atomic/consistency +# checks can be performed. +# +def main(argv): + os.system("make -C ~/rocksdb db_stress") + try: + opts, args = getopt.getopt(argv, "hd:t:k:o:b:") + except getopt.GetoptError: + print str(getopt.GetoptError) + print "db_crashtest2.py -d -t <#threads> " \ + "-k -o "\ + "-b \n" + sys.exit(2) + + # default values, will be overridden by cmdline args + kill_random_test = 97 # kill with probability 1/97 by default + duration = 6000 # total time for this script to test db_stress + threads = 32 + ops_per_thread = 200000 + write_buf_size = 4 * 1024 * 1024 + + for opt, arg in opts: + if opt == '-h': + print "db_crashtest2.py -d -t <#threads> " \ + "-k -o "\ + "-b \n" + sys.exit() + elif opt == ("-d"): + duration = int(arg) + elif opt == ("-t"): + threads = int(arg) + elif opt == ("-k"): + kill_random_test = int(arg) + elif opt == ("-i"): + interval = int(arg) + elif opt == ("-o"): + ops_per_thread = int(arg) + elif opt == ("-b"): + write_buf_size = int(arg) + else: + print "unrecognized option " + str(opt) + "\n" + print "db_crashtest2.py -d -t <#threads> " \ + "-k -o " \ + "-b \n" + sys.exit(2) + + exit_time = time.time() + duration + + dirpath = tempfile.mkdtemp() + + # kill in every alternate run. toggle tracks which run we are doing. + toggle = True + + while time.time() < exit_time: + run_had_errors = False + print "Running db_stress \n" + + if toggle: + # since we are going to kill anyway, use more ops per thread + new_ops_per_thread = 100 * ops_per_thread + killoption = '--kill_random_test=' + str(kill_random_test) + else: + new_ops_per_thread = ops_per_thread + killoption = '' + + toggle = not toggle + + cmd = ['~/rocksdb/db_stress \ + --test_batches_snapshots=1 \ + --ops_per_thread=0' + str(new_ops_per_thread) + ' \ + --threads=0' + str(threads) + ' \ + --write_buffer_size=' + str(write_buf_size) + ' \ + --destroy_db_initially=0 ' + killoption + ' \ + --reopen=0 \ + --readpercent=50 \ + --db=' + dirpath + ' \ + --max_key=10000'] + try: + subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) + if killoption != '': + logging.warn("WARNING: db_stress did not kill itself\n") + continue + + except subprocess.CalledProcessError as e: + msg = "db_stress retncode {0} output {1}".format(e.returncode, + e.output) + logging.info(msg) + print msg + msglower = msg.lower() + if ('error' in msglower) or ('fail' in msglower): + print "TEST FAILED!!!\n" + sys.exit(2) + time.sleep(1) # time to stabilize after a kill + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 54ab9c218..3a759d5c9 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -122,6 +122,10 @@ static bool FLAGS_disable_data_sync = false; // If true, issue fsync instead of fdatasync static bool FLAGS_use_fsync = false; +// If non-zero, kill at various points in source code with probability 1/this +static int FLAGS_kill_random_test = 0; +extern int leveldb_kill_odds; + // If true, do not write WAL for write. static bool FLAGS_disable_wal = false; @@ -698,7 +702,7 @@ class StressTest { char expected_prefix = (keys[i])[0]; char actual_prefix = (values[i])[0]; if (actual_prefix != expected_prefix) { - fprintf(stderr, "expected prefix = %c actual = %c\n", + fprintf(stderr, "error expected prefix = %c actual = %c\n", expected_prefix, actual_prefix); } (values[i])[0] = ' '; // blank out the differing character @@ -710,7 +714,7 @@ class StressTest { // Now that we retrieved all values, check that they all match for (int i = 1; i < 10; i++) { if (values[i] != values[0]) { - fprintf(stderr, "inconsistent values for key %s: %s, %s\n", + fprintf(stderr, "error : inconsistent values for key %s: %s, %s\n", key.ToString().c_str(), values[0].c_str(), values[i].c_str()); // we continue after error rather than exiting so that we can @@ -931,6 +935,7 @@ class StressTest { options.env = FLAGS_env; options.disableDataSync = FLAGS_disable_data_sync; options.use_fsync = FLAGS_use_fsync; + leveldb_kill_odds = FLAGS_kill_random_test; options.target_file_size_base = FLAGS_target_file_size_base; options.target_file_size_multiplier = FLAGS_target_file_size_multiplier; options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base; @@ -1093,6 +1098,9 @@ int main(int argc, char** argv) { } else if (sscanf(argv[i], "--use_fsync=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_use_fsync = n; + } else if (sscanf(argv[i], "--kill_random_test=%d%c", &n, &junk) == 1 && + (n >= 0)) { + FLAGS_kill_random_test = n; } else if (sscanf(argv[i], "--disable_wal=%d%c", &n, &junk) == 1 && (n == 0 || n == 1)) { FLAGS_disable_wal = n; diff --git a/util/env_posix.cc b/util/env_posix.cc index 30e05d325..78afcddd6 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -32,6 +32,8 @@ #include "util/coding.h" #include "util/logging.h" #include "util/posix_logger.h" +#include "util/random.h" +#include #if !defined(TMPFS_MAGIC) #define TMPFS_MAGIC 0x01021994 @@ -48,8 +50,13 @@ bool useFsReadAhead = 1; // allow filesystem to do readaheads bool useMmapRead = 0; // do not use mmaps for reading files bool useMmapWrite = 1; // use mmaps for appending to files +// This is only set from db_stress.cc and for testing only. +// If non-zero, kill at various points in source code with probability 1/this +int leveldb_kill_odds = 0; + namespace leveldb { + namespace { // list of pathnames that are locked @@ -60,6 +67,39 @@ static Status IOError(const std::string& context, int err_number) { return Status::IOError(context, strerror(err_number)); } +#ifdef NDEBUG +// empty in release build +#define TEST_KILL_RANDOM(leveldb_kill_odds) +#else + +// Kill the process with probablity 1/odds for testing. +static void TestKillRandom(int odds, const std::string& srcfile, + int srcline) { + time_t curtime = time(nullptr); + Random r((uint32_t)curtime); + + assert(odds > 0); + bool crash = r.OneIn(odds); + if (crash) { + fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline); + fflush(stdout); + kill(getpid(), SIGTERM); + } +} + +// To avoid crashing always at some frequently executed codepaths (during +// kill random test), use this factor to reduce odds +#define REDUCE_ODDS 2 +#define REDUCE_ODDS2 4 + +#define TEST_KILL_RANDOM(leveldb_kill_odds) { \ + if (leveldb_kill_odds > 0) { \ + TestKillRandom(leveldb_kill_odds, __FILE__, __LINE__); \ + } \ +} + +#endif + class PosixSequentialFile: public SequentialFile { private: std::string filename_; @@ -232,6 +272,7 @@ class PosixMmapFile : public WritableFile { bool UnmapCurrentRegion() { bool result = true; + TEST_KILL_RANDOM(leveldb_kill_odds); if (base_ != nullptr) { if (last_sync_ < limit_) { // Defer syncing this data until next Sync() call, if any @@ -257,18 +298,22 @@ class PosixMmapFile : public WritableFile { Status MapNewRegion() { assert(base_ == nullptr); + TEST_KILL_RANDOM(leveldb_kill_odds); int alloc_status = posix_fallocate(fd_, file_offset_, map_size_); if (alloc_status != 0) { return Status::IOError("Error allocating space to file : " + filename_ + "Error : " + strerror(alloc_status)); } - + TEST_KILL_RANDOM(leveldb_kill_odds); void* ptr = mmap(nullptr, map_size_, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, file_offset_); if (ptr == MAP_FAILED) { return Status::IOError("MMap failed on " + filename_); } + + TEST_KILL_RANDOM(leveldb_kill_odds); + base_ = reinterpret_cast(ptr); limit_ = base_ + map_size_; dst_ = base_; @@ -303,6 +348,7 @@ class PosixMmapFile : public WritableFile { virtual Status Append(const Slice& data) { const char* src = data.data(); size_t left = data.size(); + TEST_KILL_RANDOM(leveldb_kill_odds * REDUCE_ODDS); PrepareWrite(GetFileSize(), left); while (left > 0) { assert(base_ <= dst_); @@ -314,6 +360,7 @@ class PosixMmapFile : public WritableFile { if (!s.ok()) { return s; } + TEST_KILL_RANDOM(leveldb_kill_odds); } } @@ -323,12 +370,16 @@ class PosixMmapFile : public WritableFile { src += n; left -= n; } + TEST_KILL_RANDOM(leveldb_kill_odds); return Status::OK(); } virtual Status Close() { Status s; size_t unused = limit_ - dst_; + + TEST_KILL_RANDOM(leveldb_kill_odds); + if (!UnmapCurrentRegion()) { s = IOError(filename_, errno); } else if (unused > 0) { @@ -338,6 +389,8 @@ class PosixMmapFile : public WritableFile { } } + TEST_KILL_RANDOM(leveldb_kill_odds); + if (close(fd_) < 0) { if (s.ok()) { s = IOError(filename_, errno); @@ -351,6 +404,7 @@ class PosixMmapFile : public WritableFile { } virtual Status Flush() { + TEST_KILL_RANDOM(leveldb_kill_odds); return Status::OK(); } @@ -359,10 +413,12 @@ class PosixMmapFile : public WritableFile { if (pending_sync_) { // Some unmapped data was not synced + TEST_KILL_RANDOM(leveldb_kill_odds); pending_sync_ = false; if (fdatasync(fd_) < 0) { s = IOError(filename_, errno); } + TEST_KILL_RANDOM(leveldb_kill_odds * REDUCE_ODDS); } if (dst_ > last_sync_) { @@ -371,9 +427,11 @@ class PosixMmapFile : public WritableFile { size_t p1 = TruncateToPageBoundary(last_sync_ - base_); size_t p2 = TruncateToPageBoundary(dst_ - base_ - 1); last_sync_ = dst_; + TEST_KILL_RANDOM(leveldb_kill_odds); if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) { s = IOError(filename_, errno); } + TEST_KILL_RANDOM(leveldb_kill_odds); } return s; @@ -385,10 +443,12 @@ class PosixMmapFile : public WritableFile { virtual Status Fsync() { if (pending_sync_) { // Some unmapped data was not synced + TEST_KILL_RANDOM(leveldb_kill_odds); pending_sync_ = false; if (fsync(fd_) < 0) { return IOError(filename_, errno); } + TEST_KILL_RANDOM(leveldb_kill_odds); } // This invocation to Sync will not issue the call to // fdatasync because pending_sync_ has already been cleared. @@ -407,6 +467,7 @@ class PosixMmapFile : public WritableFile { #ifdef OS_LINUX virtual Status Allocate(off_t offset, off_t len) { + TEST_KILL_RANDOM(leveldb_kill_odds); if (!fallocate(fd_, FALLOC_FL_KEEP_SIZE, offset, len)) { return Status::OK(); } else { @@ -455,6 +516,8 @@ class PosixWritableFile : public WritableFile { pending_sync_ = true; pending_fsync_ = true; + TEST_KILL_RANDOM(leveldb_kill_odds * REDUCE_ODDS2); + PrepareWrite(GetFileSize(), left); // if there is no space in the cache, then flush if (cursize_ + left > capacity_) { @@ -481,6 +544,8 @@ class PosixWritableFile : public WritableFile { if (done < 0) { return IOError(filename_, errno); } + TEST_KILL_RANDOM(leveldb_kill_odds); + left -= done; src += done; } @@ -494,6 +559,9 @@ class PosixWritableFile : public WritableFile { s = Flush(); // flush cache to OS if (!s.ok()) { } + + TEST_KILL_RANDOM(leveldb_kill_odds); + if (close(fd_) < 0) { if (s.ok()) { s = IOError(filename_, errno); @@ -505,6 +573,7 @@ class PosixWritableFile : public WritableFile { // write out the cached data to the OS cache virtual Status Flush() { + TEST_KILL_RANDOM(leveldb_kill_odds * REDUCE_ODDS2); size_t left = cursize_; char* src = buf_.get(); while (left != 0) { @@ -512,6 +581,7 @@ class PosixWritableFile : public WritableFile { if (done < 0) { return IOError(filename_, errno); } + TEST_KILL_RANDOM(leveldb_kill_odds * REDUCE_ODDS2); left -= done; src += done; } @@ -520,17 +590,21 @@ class PosixWritableFile : public WritableFile { } virtual Status Sync() { + TEST_KILL_RANDOM(leveldb_kill_odds); if (pending_sync_ && fdatasync(fd_) < 0) { return IOError(filename_, errno); } + TEST_KILL_RANDOM(leveldb_kill_odds); pending_sync_ = false; return Status::OK(); } virtual Status Fsync() { + TEST_KILL_RANDOM(leveldb_kill_odds); if (pending_fsync_ && fsync(fd_) < 0) { return IOError(filename_, errno); } + TEST_KILL_RANDOM(leveldb_kill_odds); pending_fsync_ = false; pending_sync_ = false; return Status::OK(); @@ -542,6 +616,7 @@ class PosixWritableFile : public WritableFile { #ifdef OS_LINUX virtual Status Allocate(off_t offset, off_t len) { + TEST_KILL_RANDOM(leveldb_kill_odds); if (!fallocate(fd_, FALLOC_FL_KEEP_SIZE, offset, len)) { return Status::OK(); } else { diff --git a/util/storage_options.h b/util/storage_options.h index 1d2c850e4..bd01094a5 100644 --- a/util/storage_options.h +++ b/util/storage_options.h @@ -21,8 +21,7 @@ class StorageOptions : public EnvOptions { readahead_compactions_(opt.allow_readahead_compactions), use_mmap_reads_(opt.allow_mmap_reads), use_mmap_writes_(opt.allow_mmap_writes), - set_fd_cloexec_(opt.is_fd_close_on_exec) - { + set_fd_cloexec_(opt.is_fd_close_on_exec) { } // copy constructor with readaheads set to readahead_compactions_