diff --git a/db_stress_tool/cf_consistency_stress.cc b/db_stress_tool/cf_consistency_stress.cc index acddae159..a0d38ffef 100644 --- a/db_stress_tool/cf_consistency_stress.cc +++ b/db_stress_tool/cf_consistency_stress.cc @@ -283,96 +283,6 @@ class CfConsistencyStressTest : public StressTest { return column_families_[thread->rand.Next() % column_families_.size()]; } -#ifdef ROCKSDB_LITE - Status TestCheckpoint(ThreadState* /* thread */, - const std::vector& /* rand_column_families */, - const std::vector& /* rand_keys */) override { - assert(false); - fprintf(stderr, - "RocksDB lite does not support " - "TestCheckpoint\n"); - std::terminate(); - } -#else - Status TestCheckpoint(ThreadState* thread, - const std::vector& /* rand_column_families */, - const std::vector& /* rand_keys */) override { - std::string checkpoint_dir = - FLAGS_db + "/.checkpoint" + ToString(thread->tid); - - // We need to clear DB including manifest files, so make a copy - Options opt_copy = options_; - opt_copy.env = db_stress_env->target(); - DestroyDB(checkpoint_dir, opt_copy); - - if (db_stress_env->FileExists(checkpoint_dir).ok()) { - // If the directory might still exist, try to delete the files one by one. - // Likely a trash file is still there. - Status my_s = DestroyDir(db_stress_env, checkpoint_dir); - if (!my_s.ok()) { - fprintf(stderr, "Fail to destory directory before checkpoint: %s", - my_s.ToString().c_str()); - } - } - - Checkpoint* checkpoint = nullptr; - Status s = Checkpoint::Create(db_, &checkpoint); - if (s.ok()) { - s = checkpoint->CreateCheckpoint(checkpoint_dir); - if (!s.ok()) { - fprintf(stderr, "Fail to create checkpoint to %s\n", - checkpoint_dir.c_str()); - std::vector files; - Status my_s = db_stress_env->GetChildren(checkpoint_dir, &files); - if (my_s.ok()) { - for (const auto& f : files) { - fprintf(stderr, " %s\n", f.c_str()); - } - } else { - fprintf(stderr, "Fail to get files under the directory to %s\n", - my_s.ToString().c_str()); - } - } - } - std::vector cf_handles; - DB* checkpoint_db = nullptr; - if (s.ok()) { - delete checkpoint; - checkpoint = nullptr; - Options options(options_); - options.listeners.clear(); - std::vector cf_descs; - // TODO(ajkr): `column_family_names_` is not safe to access here when - // `clear_column_family_one_in != 0`. But we can't easily switch to - // `ListColumnFamilies` to get names because it won't necessarily give - // the same order as `column_family_names_`. - if (FLAGS_clear_column_family_one_in == 0) { - for (const auto& name : column_family_names_) { - cf_descs.emplace_back(name, ColumnFamilyOptions(options)); - } - s = DB::OpenForReadOnly(DBOptions(options), checkpoint_dir, cf_descs, - &cf_handles, &checkpoint_db); - } - } - if (checkpoint_db != nullptr) { - for (auto cfh : cf_handles) { - delete cfh; - } - cf_handles.clear(); - delete checkpoint_db; - checkpoint_db = nullptr; - } - - if (!s.ok()) { - fprintf(stderr, "A checkpoint operation failed with: %s\n", - s.ToString().c_str()); - } else { - DestroyDB(checkpoint_dir, opt_copy); - } - return s; - } -#endif // !ROCKSDB_LITE - void VerifyDb(ThreadState* thread) const override { ReadOptions options(FLAGS_verify_checksum, true); // We must set total_order_seek to true because we are doing a SeekToFirst diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index dc7aee56c..783d0ca19 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1280,6 +1280,7 @@ Status StressTest::TestBackupRestore( from = "BackupEngine::VerifyBackup"; } } + const bool allow_persistent = thread->tid == 0; // not too many bool from_latest = false; if (s.ok()) { int count = static_cast(backup_info.size()); @@ -1302,7 +1303,7 @@ Status StressTest::TestBackupRestore( } if (s.ok()) { uint32_t to_keep = 0; - if (thread->tid == 0) { + if (allow_persistent) { // allow one thread to keep up to 2 backups to_keep = thread->rand.Uniform(3); } @@ -1338,8 +1339,7 @@ Status StressTest::TestBackupRestore( // // For simplicity, currently only verifies existence/non-existence of a // single key - for (size_t i = 0; - from_latest && restored_db && s.ok() && i < rand_column_families.size(); + for (size_t i = 0; restored_db && s.ok() && i < rand_column_families.size(); ++i) { std::string key_str = Key(rand_keys[0]); Slice key = key_str; @@ -1349,11 +1349,11 @@ Status StressTest::TestBackupRestore( &restored_value); bool exists = thread->shared->Exists(rand_column_families[i], rand_keys[0]); if (get_status.ok()) { - if (!exists) { + if (!exists && from_latest && ShouldAcquireMutexOnKey()) { s = Status::Corruption("key exists in restore but not in original db"); } } else if (get_status.IsNotFound()) { - if (exists) { + if (exists && from_latest && ShouldAcquireMutexOnKey()) { s = Status::Corruption("key exists in original db but not in restore"); } } else { @@ -1374,6 +1374,21 @@ Status StressTest::TestBackupRestore( delete restored_db; restored_db = nullptr; } + if (s.ok()) { + // Preserve directories on failure, or allowed persistent backup + if (!allow_persistent) { + s = DestroyDir(db_stress_env, backup_dir); + if (!s.ok()) { + from = "Destroy backup dir"; + } + } + } + if (s.ok()) { + s = DestroyDir(db_stress_env, restore_dir); + if (!s.ok()) { + from = "Destroy restore dir"; + } + } if (!s.ok()) { fprintf(stderr, "Failure in %s with: %s\n", from.c_str(), s.ToString().c_str()); @@ -1426,10 +1441,6 @@ Status StressTest::TestApproximateSize( Status StressTest::TestCheckpoint(ThreadState* thread, const std::vector& rand_column_families, const std::vector& rand_keys) { - // Note the column families chosen by `rand_column_families` cannot be - // dropped while the locks for `rand_keys` are held. So we should not have - // to worry about accessing those column families throughout this function. - assert(rand_column_families.size() == rand_keys.size()); std::string checkpoint_dir = FLAGS_db + "/.checkpoint" + ToString(thread->tid); Options tmp_opts(options_); @@ -1479,6 +1490,7 @@ Status StressTest::TestCheckpoint(ThreadState* thread, // `clear_column_family_one_in != 0`. But we can't easily switch to // `ListColumnFamilies` to get names because it won't necessarily give // the same order as `column_family_names_`. + assert(FLAGS_clear_column_family_one_in == 0); if (FLAGS_clear_column_family_one_in == 0) { for (const auto& name : column_family_names_) { cf_descs.emplace_back(name, ColumnFamilyOptions(options)); @@ -1488,21 +1500,24 @@ Status StressTest::TestCheckpoint(ThreadState* thread, } } if (checkpoint_db != nullptr) { + // Note the column families chosen by `rand_column_families` cannot be + // dropped while the locks for `rand_keys` are held. So we should not have + // to worry about accessing those column families throughout this function. for (size_t i = 0; s.ok() && i < rand_column_families.size(); ++i) { - std::string key_str = Key(rand_keys[i]); + std::string key_str = Key(rand_keys[0]); Slice key = key_str; std::string value; Status get_status = checkpoint_db->Get( ReadOptions(), cf_handles[rand_column_families[i]], key, &value); bool exists = - thread->shared->Exists(rand_column_families[i], rand_keys[i]); + thread->shared->Exists(rand_column_families[i], rand_keys[0]); if (get_status.ok()) { - if (!exists) { + if (!exists && ShouldAcquireMutexOnKey()) { s = Status::Corruption( "key exists in checkpoint but not in original db"); } } else if (get_status.IsNotFound()) { - if (exists) { + if (exists && ShouldAcquireMutexOnKey()) { s = Status::Corruption( "key exists in original db but not in checkpoint"); }