Remove GetSortedWalFiles/GetCurrentWalFile from the crash test (#6491)
Summary: Currently, `db_stress` tests a randomly picked one of `GetLiveFiles`, `GetSortedWalFiles`, and `GetCurrentWalFile` with a 1/N chance when the command line parameter `get_live_files_and_wal_files_one_in` is specified. The problem is that `GetSortedWalFiles` and `GetCurrentWalFile` are unreliable in the sense that they can return errors if another thread removes a WAL file while they are executing (which is a perfectly plausible and legitimate scenario). The patch splits this command line parameter into three (one for each API), and changes the crash test script so that only `GetLiveFiles` is tested during our continuous crash test runs. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6491 Test Plan: ``` make check python tools/db_crashtest.py whitebox ``` Reviewed By: siying Differential Revision: D20312200 Pulled By: ltamasi fbshipit-source-id: e7c3481eddfe3bd3d5349476e34abc9eee5b7dc8
This commit is contained in:
parent
8ad4b32c5d
commit
217ce20021
@ -128,7 +128,9 @@ DECLARE_int32(universal_min_merge_width);
|
||||
DECLARE_int32(universal_max_merge_width);
|
||||
DECLARE_int32(universal_max_size_amplification_percent);
|
||||
DECLARE_int32(clear_column_family_one_in);
|
||||
DECLARE_int32(get_live_files_and_wal_files_one_in);
|
||||
DECLARE_int32(get_live_files_one_in);
|
||||
DECLARE_int32(get_sorted_wal_files_one_in);
|
||||
DECLARE_int32(get_current_wal_file_one_in);
|
||||
DECLARE_int32(set_options_one_in);
|
||||
DECLARE_int32(set_in_place_one_in);
|
||||
DECLARE_int64(cache_size);
|
||||
|
@ -256,10 +256,21 @@ DEFINE_int32(clear_column_family_one_in, 1000000,
|
||||
"it again. If N == 0, never drop/create column families. "
|
||||
"When test_batches_snapshots is true, this flag has no effect");
|
||||
|
||||
DEFINE_int32(get_live_files_and_wal_files_one_in, 1000000,
|
||||
"With a chance of 1/N, call GetLiveFiles, GetSortedWalFiles "
|
||||
"and GetCurrentWalFile to verify if it returns correctly. If "
|
||||
"N == 0, never call the three interfaces.");
|
||||
DEFINE_int32(get_live_files_one_in, 1000000,
|
||||
"With a chance of 1/N, call GetLiveFiles to verify if it returns "
|
||||
"correctly. If N == 0, do not call the interface.");
|
||||
|
||||
DEFINE_int32(
|
||||
get_sorted_wal_files_one_in, 1000000,
|
||||
"With a chance of 1/N, call GetSortedWalFiles to verify if it returns "
|
||||
"correctly. (Note that this API may legitimately return an error.) If N == "
|
||||
"0, do not call the interface.");
|
||||
|
||||
DEFINE_int32(
|
||||
get_current_wal_file_one_in, 1000000,
|
||||
"With a chance of 1/N, call GetCurrentWalFile to verify if it returns "
|
||||
"correctly. (Note that this API may legitimately return an error.) If N == "
|
||||
"0, do not call the interface.");
|
||||
|
||||
DEFINE_int32(set_options_one_in, 0,
|
||||
"With a chance of 1/N, change some random options");
|
||||
|
@ -593,13 +593,28 @@ void StressTest::OperateDb(ThreadState* thread) {
|
||||
}
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
// Every 1 in N verify the one of the following: 1) GetLiveFiles
|
||||
// 2) GetSortedWalFiles 3) GetCurrentWalFile. Each time, randomly select
|
||||
// one of them to run the test.
|
||||
if (thread->rand.OneInOpt(FLAGS_get_live_files_and_wal_files_one_in)) {
|
||||
Status status = VerifyGetLiveAndWalFiles(thread);
|
||||
// Verify GetLiveFiles with a 1 in N chance.
|
||||
if (thread->rand.OneInOpt(FLAGS_get_live_files_one_in)) {
|
||||
Status status = VerifyGetLiveFiles();
|
||||
if (!status.ok()) {
|
||||
VerificationAbort(shared, "VerifyGetLiveAndWalFiles status not OK",
|
||||
VerificationAbort(shared, "VerifyGetLiveFiles status not OK", status);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify GetSortedWalFiles with a 1 in N chance.
|
||||
if (thread->rand.OneInOpt(FLAGS_get_sorted_wal_files_one_in)) {
|
||||
Status status = VerifyGetSortedWalFiles();
|
||||
if (!status.ok()) {
|
||||
VerificationAbort(shared, "VerifyGetSortedWalFiles status not OK",
|
||||
status);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify GetCurrentWalFile with a 1 in N chance.
|
||||
if (thread->rand.OneInOpt(FLAGS_get_current_wal_file_one_in)) {
|
||||
Status status = VerifyGetCurrentWalFile();
|
||||
if (!status.ok()) {
|
||||
VerificationAbort(shared, "VerifyGetCurrentWalFile status not OK",
|
||||
status);
|
||||
}
|
||||
}
|
||||
@ -978,28 +993,23 @@ Status StressTest::TestIterate(ThreadState* thread,
|
||||
}
|
||||
|
||||
#ifndef ROCKSDB_LITE
|
||||
// Test the return status of GetLiveFiles, GetSortedWalFiles, and
|
||||
// GetCurrentWalFile. Each time, randomly select one of them to run
|
||||
// and return the status.
|
||||
Status StressTest::VerifyGetLiveAndWalFiles(ThreadState* thread) {
|
||||
int case_num = thread->rand.Uniform(3);
|
||||
if (case_num == 0) {
|
||||
std::vector<std::string> live_file;
|
||||
uint64_t manifest_size;
|
||||
return db_->GetLiveFiles(live_file, &manifest_size);
|
||||
}
|
||||
// Test the return status of GetLiveFiles.
|
||||
Status StressTest::VerifyGetLiveFiles() const {
|
||||
std::vector<std::string> live_file;
|
||||
uint64_t manifest_size = 0;
|
||||
return db_->GetLiveFiles(live_file, &manifest_size);
|
||||
}
|
||||
|
||||
if (case_num == 1) {
|
||||
VectorLogPtr log_ptr;
|
||||
return db_->GetSortedWalFiles(log_ptr);
|
||||
}
|
||||
// Test the return status of GetSortedWalFiles.
|
||||
Status StressTest::VerifyGetSortedWalFiles() const {
|
||||
VectorLogPtr log_ptr;
|
||||
return db_->GetSortedWalFiles(log_ptr);
|
||||
}
|
||||
|
||||
if (case_num == 2) {
|
||||
std::unique_ptr<LogFile> cur_wal_file;
|
||||
return db_->GetCurrentWalFile(&cur_wal_file);
|
||||
}
|
||||
assert(false);
|
||||
return Status::Corruption("Undefined case happens!");
|
||||
// Test the return status of GetCurrentWalFile.
|
||||
Status StressTest::VerifyGetCurrentWalFile() const {
|
||||
std::unique_ptr<LogFile> cur_wal_file;
|
||||
return db_->GetCurrentWalFile(&cur_wal_file);
|
||||
}
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
||||
|
@ -184,7 +184,10 @@ class StressTest {
|
||||
|
||||
Status MaybeReleaseSnapshots(ThreadState* thread, uint64_t i);
|
||||
#ifndef ROCKSDB_LITE
|
||||
Status VerifyGetLiveAndWalFiles(ThreadState* thread);
|
||||
Status VerifyGetLiveFiles() const;
|
||||
Status VerifyGetSortedWalFiles() const;
|
||||
Status VerifyGetCurrentWalFile() const;
|
||||
|
||||
virtual Status TestApproximateSize(
|
||||
ThreadState* thread, uint64_t iteration,
|
||||
const std::vector<int>& rand_column_families,
|
||||
|
@ -51,7 +51,11 @@ default_params = {
|
||||
"enable_pipelined_write": lambda: random.randint(0, 1),
|
||||
"expected_values_path": expected_values_file.name,
|
||||
"flush_one_in": 1000000,
|
||||
"get_live_files_and_wal_files_one_in": 1000000,
|
||||
"get_live_files_one_in": 1000000,
|
||||
# Note: the following two are intentionally disabled as the corresponding
|
||||
# APIs are not guaranteed to succeed.
|
||||
"get_sorted_wal_files_one_in": 0,
|
||||
"get_current_wal_file_one_in": 0,
|
||||
# Temporarily disable hash index
|
||||
"index_type": lambda: random.choice([0,2]),
|
||||
"max_background_compactions": 20,
|
||||
|
Loading…
Reference in New Issue
Block a user