Stress Test to inject write failures in reopen (#8474)

Summary:
Previously Stress can inject metadata write failures when reopening a DB. We extend it to file append too, in the same way.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8474

Test Plan: manually run crash test with various setting and make sure the failures are triggered as expected.

Reviewed By: zhichao-cao

Differential Revision: D29503116

fbshipit-source-id: e73a446e80ccbd09301a579280e56ff949381fab
This commit is contained in:
sdong 2021-06-30 16:45:44 -07:00 committed by Facebook GitHub Bot
parent 41c4b665f4
commit ba224b75c7
7 changed files with 53 additions and 21 deletions

View File

@ -824,5 +824,8 @@ DEFINE_int32(open_metadata_write_fault_one_in, 0,
DEFINE_string(secondary_cache_uri, "",
"Full URI for creating a customized secondary cache object");
#endif // ROCKSDB_LITE
DEFINE_int32(open_write_fault_one_in, 0,
"On non-zero, enables fault injection on file write "
"during DB reopen.");
#endif // GFLAGS

View File

@ -31,6 +31,7 @@ DECLARE_int32(continuous_verification_interval);
DECLARE_int32(read_fault_one_in);
DECLARE_int32(write_fault_one_in);
DECLARE_int32(open_metadata_write_fault_one_in);
DECLARE_int32(open_write_fault_one_in);
namespace ROCKSDB_NAMESPACE {
class StressTest;

View File

@ -620,7 +620,8 @@ void StressTest::OperateDb(ThreadState* thread) {
FileType::kDescriptorFile,
FileType::kCurrentFile};
fault_fs_guard->SetRandomWriteError(
thread->shared->GetSeed(), FLAGS_write_fault_one_in, error_msg, types);
thread->shared->GetSeed(), FLAGS_write_fault_one_in, error_msg,
/*inject_for_all_file_types=*/false, types);
}
#endif // NDEBUG
thread->stats.Start();
@ -2464,15 +2465,28 @@ void StressTest::Open() {
// Only ingest metadata error if it is reopening, as initial open
// failure doesn't need to be handled.
// TODO cover transaction DB is not covered in this fault test too.
bool ingest_meta_error =
FLAGS_open_metadata_write_fault_one_in &&
bool ingest_meta_error = false;
bool ingest_write_error = false;
if ((FLAGS_open_metadata_write_fault_one_in ||
FLAGS_open_write_fault_one_in) &&
fault_fs_guard
->FileExists(FLAGS_db + "/CURRENT", IOOptions(), nullptr)
.ok();
if (ingest_meta_error) {
fault_fs_guard->EnableMetadataWriteErrorInjection();
fault_fs_guard->SetRandomMetadataWriteError(
FLAGS_open_metadata_write_fault_one_in);
.ok()) {
ingest_meta_error = FLAGS_open_metadata_write_fault_one_in;
ingest_write_error = FLAGS_open_write_fault_one_in;
if (ingest_meta_error) {
fault_fs_guard->EnableMetadataWriteErrorInjection();
fault_fs_guard->SetRandomMetadataWriteError(
FLAGS_open_metadata_write_fault_one_in);
}
if (ingest_write_error) {
fault_fs_guard->SetFilesystemDirectWritable(false);
fault_fs_guard->EnableWriteErrorInjection();
fault_fs_guard->SetRandomWriteError(
static_cast<uint32_t>(FLAGS_seed), FLAGS_open_write_fault_one_in,
IOStatus::IOError("Injected Open Error"),
/*inject_for_all_file_types=*/true, /*types=*/{});
}
}
while (true) {
#endif // NDEBUG
@ -2506,8 +2520,10 @@ void StressTest::Open() {
}
#ifndef NDEBUG
if (ingest_meta_error) {
if (ingest_meta_error || ingest_write_error) {
fault_fs_guard->SetFilesystemDirectWritable(true);
fault_fs_guard->DisableMetadataWriteErrorInjection();
fault_fs_guard->DisableWriteErrorInjection();
if (s.ok()) {
// Ingested errors might happen in background compactions. We
// wait for all compactions to finish to make sure DB is in
@ -2523,6 +2539,7 @@ void StressTest::Open() {
// successfully open the DB with correct data if no IO error shows
// up.
ingest_meta_error = false;
ingest_write_error = false;
Random rand(static_cast<uint32_t>(FLAGS_seed));
if (rand.OneIn(2)) {

View File

@ -86,7 +86,8 @@ int db_stress_tool(int argc, char** argv) {
#ifndef NDEBUG
if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection ||
FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in) {
FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in ||
FLAGS_open_write_fault_one_in) {
FaultInjectionTestFS* fs =
new FaultInjectionTestFS(raw_env->GetFileSystem());
fault_fs_guard.reset(fs);

View File

@ -140,6 +140,7 @@ default_params = {
"key_len_percent_dist": "1,30,69",
"read_fault_one_in": lambda: random.choice([0, 1000]),
"open_metadata_write_fault_one_in": lambda: random.choice([0, 8]),
"open_write_fault_one_in": lambda: random.choice([0, 16]),
"sync_fault_injection": False,
"get_property_one_in": 1000000,
"paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),

View File

@ -168,7 +168,8 @@ IOStatus TestFSWritableFile::Append(
state_.pos_ += data.size();
fs_->WritableFileAppended(state_);
}
return IOStatus::OK();
IOStatus io_s = fs_->InjectWriteError(state_.filename_);
return io_s;
}
IOStatus TestFSWritableFile::PositionedAppend(
@ -194,7 +195,8 @@ IOStatus TestFSWritableFile::PositionedAppend(
return IOStatus::Corruption(msg);
}
target_->PositionedAppend(data, offset, options, dbg);
return IOStatus::OK();
IOStatus io_s = fs_->InjectWriteError(state_.filename_);
return io_s;
}
IOStatus TestFSWritableFile::Close(const IOOptions& options,
@ -724,15 +726,19 @@ IOStatus FaultInjectionTestFS::InjectWriteError(const std::string& file_name) {
}
bool allowed_type = false;
uint64_t number;
FileType cur_type = kTempFile;
std::size_t found = file_name.find_last_of("/");
std::string file = file_name.substr(found);
bool ret = ParseFileName(file, &number, &cur_type);
if (ret) {
for (const auto& type : write_error_allowed_types_) {
if (cur_type == type) {
allowed_type = true;
if (inject_for_all_file_types_) {
allowed_type = true;
} else {
uint64_t number;
FileType cur_type = kTempFile;
std::size_t found = file_name.find_last_of("/");
std::string file = file_name.substr(found);
bool ret = ParseFileName(file, &number, &cur_type);
if (ret) {
for (const auto& type : write_error_allowed_types_) {
if (cur_type == type) {
allowed_type = true;
}
}
}
}

View File

@ -365,6 +365,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
// want to inject. Types decides the file types we want to inject the
// error (e.g., Wal files, SST files), which is empty by default.
void SetRandomWriteError(uint32_t seed, int one_in, IOStatus error,
bool inject_for_all_file_types,
const std::vector<FileType>& types) {
MutexLock l(&mutex_);
Random tmp_rand(seed);
@ -372,6 +373,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
error_ = error;
write_error_rand_ = tmp_rand;
write_error_one_in_ = one_in;
inject_for_all_file_types_ = inject_for_all_file_types;
write_error_allowed_types_ = types;
}
@ -492,6 +494,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
Random write_error_rand_;
int write_error_one_in_;
int metadata_write_error_one_in_;
bool inject_for_all_file_types_;
std::vector<FileType> write_error_allowed_types_;
bool ingest_data_corruption_before_write_;
ChecksumType checksum_handoff_func_tpye_;