Verify restore from backup in db_stress (#4655)
Summary: We already exercised backup functionality in `db_stress` according to the `-backup_one_in` flag. This PR verifies the backup can be restored/opened and sanity checks a few keys. Changes in this PR: - Extracted existing backup-related logic to a helper function, `TestBackupRestore` - Added restore logic, which targets a hidden directory named "./.restore\<thread number\>", similar to how backups target hidden directories named "./.backup\<thread number\>". - After restore, check the existence/non-existence of a few keys. - With this PR, backup is no longer compatible with clearing column families. - Also included unrelated fixes to set `ReadOptions::total_order_seek=true` when using `-compare_full_db_state_snapshot` Pull Request resolved: https://github.com/facebook/rocksdb/pull/4655 Differential Revision: D12972496 Pulled By: ajkr fbshipit-source-id: 481a40052d9a38d1bd5c5159aa4d7c5a4b546b80
This commit is contained in:
parent
8c2a48742a
commit
8ba17f382e
@ -1735,6 +1735,9 @@ class StressTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (snap_state.key_vec != nullptr) {
|
if (snap_state.key_vec != nullptr) {
|
||||||
|
// When `prefix_extractor` is set, seeking to beginning and scanning
|
||||||
|
// across prefixes are only supported with `total_order_seek` set.
|
||||||
|
ropt.total_order_seek = true;
|
||||||
std::unique_ptr<Iterator> iterator(db->NewIterator(ropt));
|
std::unique_ptr<Iterator> iterator(db->NewIterator(ropt));
|
||||||
std::unique_ptr<std::vector<bool>> tmp_bitvec(new std::vector<bool>(FLAGS_max_key));
|
std::unique_ptr<std::vector<bool>> tmp_bitvec(new std::vector<bool>(FLAGS_max_key));
|
||||||
for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) {
|
for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) {
|
||||||
@ -1884,27 +1887,6 @@ class StressTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (FLAGS_backup_one_in > 0 &&
|
|
||||||
thread->rand.Uniform(FLAGS_backup_one_in) == 0) {
|
|
||||||
std::string backup_dir = FLAGS_db + "/.backup" + ToString(thread->tid);
|
|
||||||
BackupableDBOptions backup_opts(backup_dir);
|
|
||||||
BackupEngine* backup_engine = nullptr;
|
|
||||||
Status s = BackupEngine::Open(FLAGS_env, backup_opts, &backup_engine);
|
|
||||||
if (s.ok()) {
|
|
||||||
s = backup_engine->CreateNewBackup(db_);
|
|
||||||
}
|
|
||||||
if (s.ok()) {
|
|
||||||
s = backup_engine->PurgeOldBackups(0 /* num_backups_to_keep */);
|
|
||||||
}
|
|
||||||
if (!s.ok()) {
|
|
||||||
printf("A BackupEngine operation failed with: %s\n",
|
|
||||||
s.ToString().c_str());
|
|
||||||
}
|
|
||||||
if (backup_engine != nullptr) {
|
|
||||||
delete backup_engine;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FLAGS_compact_files_one_in > 0 &&
|
if (FLAGS_compact_files_one_in > 0 &&
|
||||||
thread->rand.Uniform(FLAGS_compact_files_one_in) == 0) {
|
thread->rand.Uniform(FLAGS_compact_files_one_in) == 0) {
|
||||||
auto* random_cf =
|
auto* random_cf =
|
||||||
@ -2012,6 +1994,15 @@ class StressTest {
|
|||||||
TestIngestExternalFile(thread, rand_column_families, rand_keys, lock);
|
TestIngestExternalFile(thread, rand_column_families, rand_keys, lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (FLAGS_backup_one_in > 0 &&
|
||||||
|
thread->rand.Uniform(FLAGS_backup_one_in) == 0) {
|
||||||
|
Status s = TestBackupRestore(thread, rand_column_families, rand_keys);
|
||||||
|
if (!s.ok()) {
|
||||||
|
VerificationAbort(shared, "Backup/restore gave inconsistent state",
|
||||||
|
s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (FLAGS_acquire_snapshot_one_in > 0 &&
|
if (FLAGS_acquire_snapshot_one_in > 0 &&
|
||||||
thread->rand.Uniform(FLAGS_acquire_snapshot_one_in) == 0) {
|
thread->rand.Uniform(FLAGS_acquire_snapshot_one_in) == 0) {
|
||||||
auto snapshot = db_->GetSnapshot();
|
auto snapshot = db_->GetSnapshot();
|
||||||
@ -2027,6 +2018,9 @@ class StressTest {
|
|||||||
if (FLAGS_compare_full_db_state_snapshot &&
|
if (FLAGS_compare_full_db_state_snapshot &&
|
||||||
(thread->tid == 0)) {
|
(thread->tid == 0)) {
|
||||||
key_vec = new std::vector<bool>(FLAGS_max_key);
|
key_vec = new std::vector<bool>(FLAGS_max_key);
|
||||||
|
// When `prefix_extractor` is set, seeking to beginning and scanning
|
||||||
|
// across prefixes are only supported with `total_order_seek` set.
|
||||||
|
ropt.total_order_seek = true;
|
||||||
std::unique_ptr<Iterator> iterator(db_->NewIterator(ropt));
|
std::unique_ptr<Iterator> iterator(db_->NewIterator(ropt));
|
||||||
for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) {
|
for (iterator->SeekToFirst(); iterator->Valid(); iterator->Next()) {
|
||||||
uint64_t key_val;
|
uint64_t key_val;
|
||||||
@ -2197,6 +2191,106 @@ class StressTest {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ROCKSDB_LITE
|
||||||
|
virtual Status TestBackupRestore(
|
||||||
|
ThreadState* /* thread */,
|
||||||
|
const std::vector<int>& /* rand_column_families */,
|
||||||
|
const std::vector<int64_t>& /* rand_keys */) {
|
||||||
|
assert(false);
|
||||||
|
fprintf(stderr,
|
||||||
|
"RocksDB lite does not support "
|
||||||
|
"TestBackupRestore\n");
|
||||||
|
std::terminate();
|
||||||
|
}
|
||||||
|
#else // ROCKSDB_LITE
|
||||||
|
virtual Status TestBackupRestore(ThreadState* thread,
|
||||||
|
const std::vector<int>& rand_column_families,
|
||||||
|
const std::vector<int64_t>& rand_keys) {
|
||||||
|
// Note the column families chosen by `rand_column_families` cannot be
|
||||||
|
// dropped while the locks for `rand_keys` are held. So we should not have
|
||||||
|
// to worry about accessing those column families throughout this function.
|
||||||
|
assert(rand_column_families.size() == rand_keys.size());
|
||||||
|
std::string backup_dir = FLAGS_db + "/.backup" + ToString(thread->tid);
|
||||||
|
std::string restore_dir = FLAGS_db + "/.restore" + ToString(thread->tid);
|
||||||
|
BackupableDBOptions backup_opts(backup_dir);
|
||||||
|
BackupEngine* backup_engine = nullptr;
|
||||||
|
Status s = BackupEngine::Open(FLAGS_env, backup_opts, &backup_engine);
|
||||||
|
if (s.ok()) {
|
||||||
|
s = backup_engine->CreateNewBackup(db_);
|
||||||
|
}
|
||||||
|
if (s.ok()) {
|
||||||
|
delete backup_engine;
|
||||||
|
backup_engine = nullptr;
|
||||||
|
s = BackupEngine::Open(FLAGS_env, backup_opts, &backup_engine);
|
||||||
|
}
|
||||||
|
if (s.ok()) {
|
||||||
|
s = backup_engine->RestoreDBFromLatestBackup(restore_dir /* db_dir */,
|
||||||
|
restore_dir /* wal_dir */);
|
||||||
|
}
|
||||||
|
if (s.ok()) {
|
||||||
|
s = backup_engine->PurgeOldBackups(0 /* num_backups_to_keep */);
|
||||||
|
}
|
||||||
|
DB* restored_db = nullptr;
|
||||||
|
std::vector<ColumnFamilyHandle*> restored_cf_handles;
|
||||||
|
if (s.ok()) {
|
||||||
|
Options restore_options(options_);
|
||||||
|
restore_options.listeners.clear();
|
||||||
|
std::vector<ColumnFamilyDescriptor> cf_descriptors;
|
||||||
|
// TODO(ajkr): `column_family_names_` is not safe to access here when
|
||||||
|
// `clear_column_family_one_in != 0`. But we can't easily switch to
|
||||||
|
// `ListColumnFamilies` to get names because it won't necessarily give
|
||||||
|
// the same order as `column_family_names_`.
|
||||||
|
assert(FLAGS_clear_column_family_one_in == 0);
|
||||||
|
for (auto name : column_family_names_) {
|
||||||
|
cf_descriptors.emplace_back(name, ColumnFamilyOptions(restore_options));
|
||||||
|
}
|
||||||
|
s = DB::Open(DBOptions(restore_options), restore_dir, cf_descriptors,
|
||||||
|
&restored_cf_handles, &restored_db);
|
||||||
|
}
|
||||||
|
// for simplicity, currently only verifies existence/non-existence of a few
|
||||||
|
// keys
|
||||||
|
for (size_t i = 0; s.ok() && i < rand_column_families.size(); ++i) {
|
||||||
|
std::string key_str = Key(rand_keys[i]);
|
||||||
|
Slice key = key_str;
|
||||||
|
std::string restored_value;
|
||||||
|
Status get_status = restored_db->Get(
|
||||||
|
ReadOptions(), restored_cf_handles[rand_column_families[i]], key,
|
||||||
|
&restored_value);
|
||||||
|
bool exists =
|
||||||
|
thread->shared->Exists(rand_column_families[i], rand_keys[i]);
|
||||||
|
if (get_status.ok()) {
|
||||||
|
if (!exists) {
|
||||||
|
s = Status::Corruption(
|
||||||
|
"key exists in restore but not in original db");
|
||||||
|
}
|
||||||
|
} else if (get_status.IsNotFound()) {
|
||||||
|
if (exists) {
|
||||||
|
s = Status::Corruption(
|
||||||
|
"key exists in original db but not in restore");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
s = get_status;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (backup_engine != nullptr) {
|
||||||
|
delete backup_engine;
|
||||||
|
backup_engine = nullptr;
|
||||||
|
}
|
||||||
|
if (restored_db != nullptr) {
|
||||||
|
for (auto* cf_handle : restored_cf_handles) {
|
||||||
|
restored_db->DestroyColumnFamilyHandle(cf_handle);
|
||||||
|
}
|
||||||
|
delete restored_db;
|
||||||
|
restored_db = nullptr;
|
||||||
|
}
|
||||||
|
if (!s.ok()) {
|
||||||
|
printf("A backup/restore operation failed with: %s\n",
|
||||||
|
s.ToString().c_str());
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
#endif // ROCKSDB_LITE
|
||||||
|
|
||||||
void VerificationAbort(SharedState* shared, std::string msg, Status s) const {
|
void VerificationAbort(SharedState* shared, std::string msg, Status s) const {
|
||||||
printf("Verification failed: %s. Status is %s\n", msg.c_str(),
|
printf("Verification failed: %s. Status is %s\n", msg.c_str(),
|
||||||
s.ToString().c_str());
|
s.ToString().c_str());
|
||||||
@ -3662,6 +3756,11 @@ int main(int argc, char** argv) {
|
|||||||
"Error: nooverwritepercent must be 0 when using file ingestion\n");
|
"Error: nooverwritepercent must be 0 when using file ingestion\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
if (FLAGS_clear_column_family_one_in > 0 && FLAGS_backup_one_in > 0) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Error: clear_column_family_one_in must be 0 when using backup\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
// Choose a location for the test database if none given with --db=<path>
|
// Choose a location for the test database if none given with --db=<path>
|
||||||
if (FLAGS_db.empty()) {
|
if (FLAGS_db.empty()) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user