Stress test to inject read failures in DB reopen (#8476)
Summary: Inject read failures in DB reopen, just as what we do for metadata writes and writes. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8476 Test Plan: Some manual tests and make sure failures are triggered. Reviewed By: anand1976 Differential Revision: D29507283 fbshipit-source-id: d04da0163973447041038bd87701686a417c4e0c
This commit is contained in:
parent
1ae026c400
commit
f33611d5e9
@ -825,7 +825,10 @@ DEFINE_string(secondary_cache_uri, "",
|
||||
"Full URI for creating a customized secondary cache object");
|
||||
#endif // ROCKSDB_LITE
|
||||
DEFINE_int32(open_write_fault_one_in, 0,
|
||||
"On non-zero, enables fault injection on file write "
|
||||
"On non-zero, enables fault injection on file writes "
|
||||
"during DB reopen.");
|
||||
DEFINE_int32(open_read_fault_one_in, 0,
|
||||
"On non-zero, enables fault injection on file reads "
|
||||
"during DB reopen.");
|
||||
DEFINE_int32(injest_error_severity, 1,
|
||||
"The severity of the injested IO Error. 1 is soft error (e.g. "
|
||||
|
@ -32,6 +32,7 @@ DECLARE_int32(read_fault_one_in);
|
||||
DECLARE_int32(write_fault_one_in);
|
||||
DECLARE_int32(open_metadata_write_fault_one_in);
|
||||
DECLARE_int32(open_write_fault_one_in);
|
||||
DECLARE_int32(open_read_fault_one_in);
|
||||
|
||||
DECLARE_int32(injest_error_severity);
|
||||
|
||||
|
@ -2476,13 +2476,15 @@ void StressTest::Open() {
|
||||
// TODO cover transaction DB is not covered in this fault test too.
|
||||
bool ingest_meta_error = false;
|
||||
bool ingest_write_error = false;
|
||||
bool ingest_read_error = false;
|
||||
if ((FLAGS_open_metadata_write_fault_one_in ||
|
||||
FLAGS_open_write_fault_one_in) &&
|
||||
FLAGS_open_write_fault_one_in || FLAGS_open_read_fault_one_in) &&
|
||||
fault_fs_guard
|
||||
->FileExists(FLAGS_db + "/CURRENT", IOOptions(), nullptr)
|
||||
.ok()) {
|
||||
ingest_meta_error = FLAGS_open_metadata_write_fault_one_in;
|
||||
ingest_write_error = FLAGS_open_write_fault_one_in;
|
||||
ingest_read_error = FLAGS_open_read_fault_one_in;
|
||||
if (ingest_meta_error) {
|
||||
fault_fs_guard->EnableMetadataWriteErrorInjection();
|
||||
fault_fs_guard->SetRandomMetadataWriteError(
|
||||
@ -2496,6 +2498,9 @@ void StressTest::Open() {
|
||||
IOStatus::IOError("Injected Open Error"),
|
||||
/*inject_for_all_file_types=*/true, /*types=*/{});
|
||||
}
|
||||
if (ingest_read_error) {
|
||||
fault_fs_guard->SetRandomReadError(FLAGS_open_read_fault_one_in);
|
||||
}
|
||||
}
|
||||
while (true) {
|
||||
#endif // NDEBUG
|
||||
@ -2529,10 +2534,11 @@ void StressTest::Open() {
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (ingest_meta_error || ingest_write_error) {
|
||||
if (ingest_meta_error || ingest_write_error || ingest_read_error) {
|
||||
fault_fs_guard->SetFilesystemDirectWritable(true);
|
||||
fault_fs_guard->DisableMetadataWriteErrorInjection();
|
||||
fault_fs_guard->DisableWriteErrorInjection();
|
||||
fault_fs_guard->SetRandomReadError(0);
|
||||
if (s.ok()) {
|
||||
// Ingested errors might happen in background compactions. We
|
||||
// wait for all compactions to finish to make sure DB is in
|
||||
@ -2549,6 +2555,7 @@ void StressTest::Open() {
|
||||
// up.
|
||||
ingest_meta_error = false;
|
||||
ingest_write_error = false;
|
||||
ingest_read_error = false;
|
||||
|
||||
Random rand(static_cast<uint32_t>(FLAGS_seed));
|
||||
if (rand.OneIn(2)) {
|
||||
|
@ -87,7 +87,7 @@ int db_stress_tool(int argc, char** argv) {
|
||||
#ifndef NDEBUG
|
||||
if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection ||
|
||||
FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in ||
|
||||
FLAGS_open_write_fault_one_in) {
|
||||
FLAGS_open_write_fault_one_in || FLAGS_open_read_fault_one_in) {
|
||||
FaultInjectionTestFS* fs =
|
||||
new FaultInjectionTestFS(raw_env->GetFileSystem());
|
||||
fault_fs_guard.reset(fs);
|
||||
|
@ -139,8 +139,9 @@ default_params = {
|
||||
"max_key_len": 3,
|
||||
"key_len_percent_dist": "1,30,69",
|
||||
"read_fault_one_in": lambda: random.choice([0, 1000]),
|
||||
"open_metadata_write_fault_one_in": lambda: random.choice([0, 8]),
|
||||
"open_write_fault_one_in": lambda: random.choice([0, 16]),
|
||||
"open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]),
|
||||
"open_write_fault_one_in": lambda: random.choice([0, 0, 16]),
|
||||
"open_read_fault_one_in": lambda: random.choice([0, 0, 32]),
|
||||
"sync_fault_injection": False,
|
||||
"get_property_one_in": 1000000,
|
||||
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
|
||||
|
@ -324,8 +324,12 @@ IOStatus TestFSRandomAccessFile::Read(uint64_t offset, size_t n,
|
||||
}
|
||||
IOStatus s = target_->Read(offset, n, options, result, scratch, dbg);
|
||||
if (s.ok()) {
|
||||
s = fs_->InjectError(FaultInjectionTestFS::ErrorOperation::kRead, result,
|
||||
use_direct_io(), scratch);
|
||||
s = fs_->InjectThreadSpecificReadError(
|
||||
FaultInjectionTestFS::ErrorOperation::kRead, result, use_direct_io(),
|
||||
scratch);
|
||||
}
|
||||
if (s.ok() && fs_->ShouldInjectRandomReadError()) {
|
||||
return IOStatus::IOError("Injected read error");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
@ -337,6 +341,27 @@ size_t TestFSRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
|
||||
return target_->GetUniqueId(id, max_size);
|
||||
}
|
||||
}
|
||||
IOStatus TestFSSequentialFile::Read(size_t n, const IOOptions& options,
|
||||
Slice* result, char* scratch,
|
||||
IODebugContext* dbg) {
|
||||
IOStatus s = target()->Read(n, options, result, scratch, dbg);
|
||||
if (s.ok() && fs_->ShouldInjectRandomReadError()) {
|
||||
return IOStatus::IOError("Injected seq read error");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
IOStatus TestFSSequentialFile::PositionedRead(uint64_t offset, size_t n,
|
||||
const IOOptions& options,
|
||||
Slice* result, char* scratch,
|
||||
IODebugContext* dbg) {
|
||||
IOStatus s =
|
||||
target()->PositionedRead(offset, n, options, result, scratch, dbg);
|
||||
if (s.ok() && fs_->ShouldInjectRandomReadError()) {
|
||||
return IOStatus::IOError("Injected seq positioned read error");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
IOStatus FaultInjectionTestFS::NewDirectory(
|
||||
const std::string& name, const IOOptions& options,
|
||||
@ -474,7 +499,11 @@ IOStatus FaultInjectionTestFS::NewRandomAccessFile(
|
||||
if (!IsFilesystemActive()) {
|
||||
return GetError();
|
||||
}
|
||||
IOStatus io_s = InjectError(ErrorOperation::kOpen, nullptr, false, nullptr);
|
||||
if (ShouldInjectRandomReadError()) {
|
||||
return IOStatus::IOError("Injected error when open random access file");
|
||||
}
|
||||
IOStatus io_s = InjectThreadSpecificReadError(ErrorOperation::kOpen, nullptr,
|
||||
false, nullptr);
|
||||
if (io_s.ok()) {
|
||||
io_s = target()->NewRandomAccessFile(fname, file_opts, result, dbg);
|
||||
}
|
||||
@ -484,6 +513,23 @@ IOStatus FaultInjectionTestFS::NewRandomAccessFile(
|
||||
return io_s;
|
||||
}
|
||||
|
||||
IOStatus FaultInjectionTestFS::NewSequentialFile(
|
||||
const std::string& fname, const FileOptions& file_opts,
|
||||
std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
|
||||
if (!IsFilesystemActive()) {
|
||||
return GetError();
|
||||
}
|
||||
|
||||
if (ShouldInjectRandomReadError()) {
|
||||
return IOStatus::IOError("Injected read error when creating seq file");
|
||||
}
|
||||
IOStatus io_s = target()->NewSequentialFile(fname, file_opts, result, dbg);
|
||||
if (io_s.ok()) {
|
||||
result->reset(new TestFSSequentialFile(result->release(), this));
|
||||
}
|
||||
return io_s;
|
||||
}
|
||||
|
||||
IOStatus FaultInjectionTestFS::DeleteFile(const std::string& f,
|
||||
const IOOptions& options,
|
||||
IODebugContext* dbg) {
|
||||
@ -642,10 +688,10 @@ void FaultInjectionTestFS::UntrackFile(const std::string& f) {
|
||||
open_files_.erase(f);
|
||||
}
|
||||
|
||||
IOStatus FaultInjectionTestFS::InjectError(ErrorOperation op,
|
||||
Slice* result,
|
||||
bool direct_io,
|
||||
char* scratch) {
|
||||
IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError(ErrorOperation op,
|
||||
Slice* result,
|
||||
bool direct_io,
|
||||
char* scratch) {
|
||||
ErrorContext* ctx =
|
||||
static_cast<ErrorContext*>(thread_local_error_->Get());
|
||||
if (ctx == nullptr || !ctx->enable_error_injection || !ctx->one_in) {
|
||||
|
@ -150,6 +150,21 @@ class TestFSRandomAccessFile : public FSRandomAccessFile {
|
||||
FaultInjectionTestFS* fs_;
|
||||
};
|
||||
|
||||
class TestFSSequentialFile : public FSSequentialFileWrapper {
|
||||
public:
|
||||
explicit TestFSSequentialFile(FSSequentialFile* f, FaultInjectionTestFS* fs)
|
||||
: FSSequentialFileWrapper(f), target_guard_(f), fs_(fs) {}
|
||||
IOStatus Read(size_t n, const IOOptions& options, Slice* result,
|
||||
char* scratch, IODebugContext* dbg) override;
|
||||
IOStatus PositionedRead(uint64_t offset, size_t n, const IOOptions& options,
|
||||
Slice* result, char* scratch,
|
||||
IODebugContext* dbg) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<FSSequentialFile> target_guard_;
|
||||
FaultInjectionTestFS* fs_;
|
||||
};
|
||||
|
||||
class TestFSDirectory : public FSDirectory {
|
||||
public:
|
||||
explicit TestFSDirectory(FaultInjectionTestFS* fs, std::string dirname,
|
||||
@ -178,6 +193,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
||||
write_error_rand_(0),
|
||||
write_error_one_in_(0),
|
||||
metadata_write_error_one_in_(0),
|
||||
read_error_one_in_(0),
|
||||
ingest_data_corruption_before_write_(false),
|
||||
fail_get_file_unique_id_(false) {}
|
||||
virtual ~FaultInjectionTestFS() { error_.PermitUncheckedError(); }
|
||||
@ -207,6 +223,9 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
||||
const FileOptions& file_opts,
|
||||
std::unique_ptr<FSRandomAccessFile>* result,
|
||||
IODebugContext* dbg) override;
|
||||
IOStatus NewSequentialFile(const std::string& f, const FileOptions& file_opts,
|
||||
std::unique_ptr<FSSequentialFile>* r,
|
||||
IODebugContext* dbg) override;
|
||||
|
||||
virtual IOStatus DeleteFile(const std::string& f, const IOOptions& options,
|
||||
IODebugContext* dbg) override;
|
||||
@ -381,6 +400,13 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
||||
MutexLock l(&mutex_);
|
||||
metadata_write_error_one_in_ = one_in;
|
||||
}
|
||||
// If the value is not 0, it is enabled. Otherwise, it is disabled.
|
||||
void SetRandomReadError(int one_in) { read_error_one_in_ = one_in; }
|
||||
|
||||
bool ShouldInjectRandomReadError() {
|
||||
return read_error_one_in() &&
|
||||
Random::GetTLSInstance()->OneIn(read_error_one_in());
|
||||
}
|
||||
|
||||
// Inject an write error with randomlized parameter and the predefined
|
||||
// error type. Only the allowed file types will inject the write error
|
||||
@ -393,8 +419,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
||||
// corruption in the contents of scratch, or truncation of slice
|
||||
// are the types of error with equal probability. For OPEN,
|
||||
// its always an IOError.
|
||||
IOStatus InjectError(ErrorOperation op, Slice* slice,
|
||||
bool direct_io, char* scratch);
|
||||
IOStatus InjectThreadSpecificReadError(ErrorOperation op, Slice* slice,
|
||||
bool direct_io, char* scratch);
|
||||
|
||||
// Get the count of how many times we injected since the previous call
|
||||
int GetAndResetErrorCount() {
|
||||
@ -420,7 +446,6 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
||||
MutexLock l(&mutex_);
|
||||
enable_write_error_injection_ = true;
|
||||
}
|
||||
|
||||
void EnableMetadataWriteErrorInjection() {
|
||||
MutexLock l(&mutex_);
|
||||
enable_metadata_write_error_injection_ = true;
|
||||
@ -444,6 +469,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
||||
enable_metadata_write_error_injection_ = false;
|
||||
}
|
||||
|
||||
int read_error_one_in() const { return read_error_one_in_.load(); }
|
||||
|
||||
// We capture a backtrace every time a fault is injected, for debugging
|
||||
// purposes. This call prints the backtrace to stderr and frees the
|
||||
// saved callstack
|
||||
@ -494,6 +521,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
||||
Random write_error_rand_;
|
||||
int write_error_one_in_;
|
||||
int metadata_write_error_one_in_;
|
||||
std::atomic<int> read_error_one_in_;
|
||||
bool inject_for_all_file_types_;
|
||||
std::vector<FileType> write_error_allowed_types_;
|
||||
bool ingest_data_corruption_before_write_;
|
||||
|
Loading…
Reference in New Issue
Block a user