Inject fatal write failures to db_stress when DB is running (#8479)
Summary: add the injest_error_severity to control if it is a retryable IO Error or a fatal or unrecoverable error. Use a flag to indicate, if fatal error comes, the flag is set and db is stopped (but not corrupted). Pull Request resolved: https://github.com/facebook/rocksdb/pull/8479 Test Plan: run ./db_stress --reopen=0 --read_fault_one_in=1000 --write_fault_one_in=5 --disable_wal=true --write_buffer_size=3000000 -writepercent=5 -readpercent=50 --injest_error_severity=2 --column_families=1, make check Reviewed By: anand1976 Differential Revision: D29524271 Pulled By: zhichao-cao fbshipit-source-id: 1aa9fb9b5655b0adba6f5ad12005ca8c074c795b
This commit is contained in:
parent
41d32152ce
commit
a95a776d75
@ -827,5 +827,9 @@ DEFINE_string(secondary_cache_uri, "",
|
||||
DEFINE_int32(open_write_fault_one_in, 0,
|
||||
"On non-zero, enables fault injection on file write "
|
||||
"during DB reopen.");
|
||||
DEFINE_int32(injest_error_severity, 1,
|
||||
"The severity of the injested IO Error. 1 is soft error (e.g. "
|
||||
"retryable error), 2 is fatal error, and the default is "
|
||||
"retryable error.");
|
||||
|
||||
#endif // GFLAGS
|
||||
|
@ -33,6 +33,8 @@ DECLARE_int32(write_fault_one_in);
|
||||
DECLARE_int32(open_metadata_write_fault_one_in);
|
||||
DECLARE_int32(open_write_fault_one_in);
|
||||
|
||||
DECLARE_int32(injest_error_severity);
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
class StressTest;
|
||||
|
||||
|
@ -62,7 +62,8 @@ StressTest::StressTest()
|
||||
new_column_family_name_(1),
|
||||
num_times_reopened_(0),
|
||||
db_preload_finished_(false),
|
||||
cmp_db_(nullptr) {
|
||||
cmp_db_(nullptr),
|
||||
is_db_stopped_(false) {
|
||||
if (FLAGS_destroy_db_initially) {
|
||||
std::vector<std::string> files;
|
||||
db_stress_env->GetChildren(FLAGS_db, &files);
|
||||
@ -614,8 +615,15 @@ void StressTest::OperateDb(ThreadState* thread) {
|
||||
FLAGS_read_fault_one_in);
|
||||
}
|
||||
if (FLAGS_write_fault_one_in) {
|
||||
IOStatus error_msg = IOStatus::IOError("Retryable IO Error");
|
||||
error_msg.SetRetryable(true);
|
||||
IOStatus error_msg;
|
||||
if (FLAGS_injest_error_severity <= 1 || FLAGS_injest_error_severity > 2) {
|
||||
error_msg = IOStatus::IOError("Retryable IO Error");
|
||||
error_msg.SetRetryable(true);
|
||||
} else if (FLAGS_injest_error_severity == 2) {
|
||||
// Ingest the fatal error
|
||||
error_msg = IOStatus::IOError("Fatal IO Error");
|
||||
error_msg.SetDataLoss(true);
|
||||
}
|
||||
std::vector<FileType> types = {FileType::kTableFile,
|
||||
FileType::kDescriptorFile,
|
||||
FileType::kCurrentFile};
|
||||
|
@ -237,6 +237,7 @@ class StressTest {
|
||||
// Fields used for continuous verification from another thread
|
||||
DB* cmp_db_;
|
||||
std::vector<ColumnFamilyHandle*> cmp_cfhs_;
|
||||
bool is_db_stopped_;
|
||||
};
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
@ -552,8 +552,18 @@ class NonBatchedOpsStressTest : public StressTest {
|
||||
}
|
||||
shared->Put(rand_column_family, rand_key, value_base, false /* pending */);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
if (FLAGS_injest_error_severity >= 2) {
|
||||
if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) {
|
||||
is_db_stopped_ = true;
|
||||
} else if (!is_db_stopped_ ||
|
||||
s.severity() < Status::Severity::kFatalError) {
|
||||
fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "put or merge error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
thread->stats.AddBytesForWrites(1, sz);
|
||||
PrintKeyValue(rand_column_family, static_cast<uint32_t>(rand_key), value,
|
||||
@ -615,8 +625,19 @@ class NonBatchedOpsStressTest : public StressTest {
|
||||
shared->Delete(rand_column_family, rand_key, false /* pending */);
|
||||
thread->stats.AddDeletes(1);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "delete error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
if (FLAGS_injest_error_severity >= 2) {
|
||||
if (!is_db_stopped_ &&
|
||||
s.severity() >= Status::Severity::kFatalError) {
|
||||
is_db_stopped_ = true;
|
||||
} else if (!is_db_stopped_ ||
|
||||
s.severity() < Status::Severity::kFatalError) {
|
||||
fprintf(stderr, "delete error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "delete error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
shared->SingleDelete(rand_column_family, rand_key, true /* pending */);
|
||||
@ -637,8 +658,19 @@ class NonBatchedOpsStressTest : public StressTest {
|
||||
shared->SingleDelete(rand_column_family, rand_key, false /* pending */);
|
||||
thread->stats.AddSingleDeletes(1);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "single delete error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
if (FLAGS_injest_error_severity >= 2) {
|
||||
if (!is_db_stopped_ &&
|
||||
s.severity() >= Status::Severity::kFatalError) {
|
||||
is_db_stopped_ = true;
|
||||
} else if (!is_db_stopped_ ||
|
||||
s.severity() < Status::Severity::kFatalError) {
|
||||
fprintf(stderr, "single delete error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "single delete error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
}
|
||||
return s;
|
||||
@ -684,8 +716,18 @@ class NonBatchedOpsStressTest : public StressTest {
|
||||
Slice end_key = end_keystr;
|
||||
Status s = db_->DeleteRange(write_opts, cfh, key, end_key);
|
||||
if (!s.ok()) {
|
||||
fprintf(stderr, "delete range error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
if (FLAGS_injest_error_severity >= 2) {
|
||||
if (!is_db_stopped_ && s.severity() >= Status::Severity::kFatalError) {
|
||||
is_db_stopped_ = true;
|
||||
} else if (!is_db_stopped_ ||
|
||||
s.severity() < Status::Severity::kFatalError) {
|
||||
fprintf(stderr, "delete range error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "delete range error: %s\n", s.ToString().c_str());
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
int covered = shared->DeleteRange(rand_column_family, rand_key,
|
||||
rand_key + FLAGS_range_deletion_width,
|
||||
|
Loading…
Reference in New Issue
Block a user