diff --git a/HISTORY.md b/HISTORY.md index 0e5591977..e8542b878 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## Unreleased ### Bug Fixes * Fix `DisableManualCompaction()` to cancel compactions even when they are waiting on automatic compactions to drain due to `CompactRangeOptions::exclusive_manual_compactions == true`. +* Fix contract of `Env::ReopenWritableFile()` and `FileSystem::ReopenWritableFile()` to specify any existing file must not be deleted or truncated. ## 6.25.1 (2021-09-28) ### Bug Fixes diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index a006a817d..bbede382a 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -1147,7 +1147,7 @@ TEST_F(ExternalSSTFileBasicTest, SyncFailure) { } Options sst_file_writer_options; - sst_file_writer_options.env = env_; + sst_file_writer_options.env = fault_injection_test_env_.get(); std::unique_ptr sst_file_writer( new SstFileWriter(EnvOptions(), sst_file_writer_options)); std::string file_name = diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 602a07125..4dd39d2a8 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -19,7 +19,10 @@ static bool ValidateUint32Range(const char* flagname, uint64_t value) { return true; } -DEFINE_uint64(seed, 2341234, "Seed for PRNG"); +DEFINE_uint64(seed, 2341234, + "Seed for PRNG. When --nooverwritepercent is " + "nonzero and --expected_values_dir is nonempty, this value " + "must be fixed across invocations."); static const bool FLAGS_seed_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_seed, &ValidateUint32Range); @@ -453,7 +456,8 @@ DEFINE_string( "provided and non-empty, the DB state will be verified against these " "values after recovery. --max_key and --column_family must be kept the " "same across invocations of this program that use the same " - "--expected_values_path."); + "--expected_values_path. See --seed and --nooverwritepercent for further " + "requirements."); DEFINE_bool(verify_checksum, false, "Verify checksum for every block read from storage"); @@ -644,7 +648,8 @@ static const bool FLAGS_delrangepercent_dummy __attribute__((__unused__)) = DEFINE_int32(nooverwritepercent, 60, "Ratio of keys without overwrite to total workload (expressed as " - " a percentage)"); + "a percentage). When --expected_values_dir is nonempty, must " + "keep this value constant across invocations."); static const bool FLAGS_nooverwritepercent_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_nooverwritepercent, &ValidateInt32Percent); diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 880046e5a..ce9827efb 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -266,11 +266,12 @@ class Env { std::unique_ptr* result, const EnvOptions& options) = 0; - // Create an object that writes to a new file with the specified - // name. Deletes any existing file with the same name and creates a - // new file. On success, stores a pointer to the new file in - // *result and returns OK. On failure stores nullptr in *result and - // returns non-OK. + // Create an object that writes to a file with the specified name. + // `WritableFile::Append()`s will append after any existing content. If the + // file does not already exist, creates it. + // + // On success, stores a pointer to the file in *result and returns OK. On + // failure stores nullptr in *result and returns non-OK. // // The returned file will only be accessed by one thread at a time. virtual Status ReopenWritableFile(const std::string& /*fname*/, diff --git a/include/rocksdb/file_system.h b/include/rocksdb/file_system.h index 2c73f720a..4dd74e8ef 100644 --- a/include/rocksdb/file_system.h +++ b/include/rocksdb/file_system.h @@ -309,11 +309,12 @@ class FileSystem { std::unique_ptr* result, IODebugContext* dbg) = 0; - // Create an object that writes to a new file with the specified - // name. Deletes any existing file with the same name and creates a - // new file. On success, stores a pointer to the new file in - // *result and returns OK. On failure stores nullptr in *result and - // returns non-OK. + // Create an object that writes to a file with the specified name. + // `FSWritableFile::Append()`s will append after any existing content. If the + // file does not already exist, creates it. + // + // On success, stores a pointer to the file in *result and returns OK. On + // failure stores nullptr in *result and returns non-OK. // // The returned file will only be accessed by one thread at a time. virtual IOStatus ReopenWritableFile( diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 3a96079bd..1d0b52723 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -78,6 +78,9 @@ default_params = { "max_key": 100000000, "max_write_buffer_number": 3, "mmap_read": lambda: random.randint(0, 1), + # Setting `nooverwritepercent > 0` is only possible because we do not vary + # the random seed, so the same keys are chosen by every run for disallowing + # overwrites. "nooverwritepercent": 1, "open_files": lambda : random.choice([-1, -1, 100, 500000]), "optimize_filters_for_memory": lambda: random.randint(0, 1), diff --git a/utilities/fault_injection_env.cc b/utilities/fault_injection_env.cc index 650655f2d..5ef206aec 100644 --- a/utilities/fault_injection_env.cc +++ b/utilities/fault_injection_env.cc @@ -288,7 +288,7 @@ Status FaultInjectionTestEnv::NewWritableFile( // again then it will be truncated - so forget our saved state. UntrackFile(fname); MutexLock l(&mutex_); - open_files_.insert(fname); + open_managed_files_.insert(fname); auto dir_and_name = GetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; list.insert(dir_and_name.second); @@ -302,17 +302,49 @@ Status FaultInjectionTestEnv::ReopenWritableFile( if (!IsFilesystemActive()) { return GetError(); } - Status s = target()->ReopenWritableFile(fname, result, soptions); + + bool exists; + Status s, exists_s = target()->FileExists(fname); + if (exists_s.IsNotFound()) { + exists = false; + } else if (exists_s.ok()) { + exists = true; + } else { + s = exists_s; + exists = false; + } + if (s.ok()) { - result->reset(new TestWritableFile(fname, std::move(*result), this)); - // WritableFileWriter* file is opened - // again then it will be truncated - so forget our saved state. - UntrackFile(fname); - MutexLock l(&mutex_); - open_files_.insert(fname); - auto dir_and_name = GetDirAndName(fname); - auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list.insert(dir_and_name.second); + s = target()->ReopenWritableFile(fname, result, soptions); + } + + // Only track files we created. Files created outside of this + // `FaultInjectionTestEnv` are not eligible for tracking/data dropping + // (for example, they may contain data a previous db_stress run expects to + // be recovered). This could be extended to track/drop data appended once + // the file is under `FaultInjectionTestEnv`'s control. + if (s.ok()) { + bool should_track; + { + MutexLock l(&mutex_); + if (db_file_state_.find(fname) != db_file_state_.end()) { + // It was written by this `Env` earlier. + assert(exists); + should_track = true; + } else if (!exists) { + // It was created by this `Env` just now. + should_track = true; + open_managed_files_.insert(fname); + auto dir_and_name = GetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list.insert(dir_and_name.second); + } else { + should_track = false; + } + } + if (should_track) { + result->reset(new TestWritableFile(fname, std::move(*result), this)); + } } return s; } @@ -330,7 +362,7 @@ Status FaultInjectionTestEnv::NewRandomRWFile( // again then it will be truncated - so forget our saved state. UntrackFile(fname); MutexLock l(&mutex_); - open_files_.insert(fname); + open_managed_files_.insert(fname); auto dir_and_name = GetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; list.insert(dir_and_name.second); @@ -394,17 +426,43 @@ Status FaultInjectionTestEnv::RenameFile(const std::string& s, return ret; } +Status FaultInjectionTestEnv::LinkFile(const std::string& s, + const std::string& t) { + if (!IsFilesystemActive()) { + return GetError(); + } + Status ret = EnvWrapper::LinkFile(s, t); + + if (ret.ok()) { + MutexLock l(&mutex_); + if (db_file_state_.find(s) != db_file_state_.end()) { + db_file_state_[t] = db_file_state_[s]; + } + + auto sdn = GetDirAndName(s); + auto tdn = GetDirAndName(t); + if (dir_to_new_files_since_last_sync_[sdn.first].find(sdn.second) != + dir_to_new_files_since_last_sync_[sdn.first].end()) { + auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; + assert(tlist.find(tdn.second) == tlist.end()); + tlist.insert(tdn.second); + } + } + + return ret; +} + void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) { MutexLock l(&mutex_); - if (open_files_.find(state.filename_) != open_files_.end()) { + if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) { db_file_state_[state.filename_] = state; - open_files_.erase(state.filename_); + open_managed_files_.erase(state.filename_); } } void FaultInjectionTestEnv::WritableFileSynced(const FileState& state) { MutexLock l(&mutex_); - if (open_files_.find(state.filename_) != open_files_.end()) { + if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) { if (db_file_state_.find(state.filename_) == db_file_state_.end()) { db_file_state_.insert(std::make_pair(state.filename_, state)); } else { @@ -415,7 +473,7 @@ void FaultInjectionTestEnv::WritableFileSynced(const FileState& state) { void FaultInjectionTestEnv::WritableFileAppended(const FileState& state) { MutexLock l(&mutex_); - if (open_files_.find(state.filename_) != open_files_.end()) { + if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) { if (db_file_state_.find(state.filename_) == db_file_state_.end()) { db_file_state_.insert(std::make_pair(state.filename_, state)); } else { @@ -485,6 +543,6 @@ void FaultInjectionTestEnv::UntrackFile(const std::string& f) { dir_to_new_files_since_last_sync_[dir_and_name.first].erase( dir_and_name.second); db_file_state_.erase(f); - open_files_.erase(f); + open_managed_files_.erase(f); } } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/fault_injection_env.h b/utilities/fault_injection_env.h index fa1fa0d64..b82b45237 100644 --- a/utilities/fault_injection_env.h +++ b/utilities/fault_injection_env.h @@ -175,6 +175,8 @@ class FaultInjectionTestEnv : public EnvWrapper { virtual Status RenameFile(const std::string& s, const std::string& t) override; + virtual Status LinkFile(const std::string& s, const std::string& t) override; + // Undef to eliminate clash on Windows #undef GetFreeSpace virtual Status GetFreeSpace(const std::string& path, @@ -237,13 +239,13 @@ class FaultInjectionTestEnv : public EnvWrapper { SetFilesystemActiveNoLock(active, error); error.PermitUncheckedError(); } - void AssertNoOpenFile() { assert(open_files_.empty()); } + void AssertNoOpenFile() { assert(open_managed_files_.empty()); } Status GetError() { return error_; } private: port::Mutex mutex_; std::map db_file_state_; - std::set open_files_; + std::set open_managed_files_; std::unordered_map> dir_to_new_files_since_last_sync_; bool filesystem_active_; // Record flushes, syncs, writes diff --git a/utilities/fault_injection_fs.cc b/utilities/fault_injection_fs.cc index 45399f24f..9babdd5e5 100644 --- a/utilities/fault_injection_fs.cc +++ b/utilities/fault_injection_fs.cc @@ -443,7 +443,7 @@ IOStatus FaultInjectionTestFS::NewWritableFile( UntrackFile(fname); { MutexLock l(&mutex_); - open_files_.insert(fname); + open_managed_files_.insert(fname); auto dir_and_name = TestFSGetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; // The new file could overwrite an old one. Here we simplify @@ -476,19 +476,50 @@ IOStatus FaultInjectionTestFS::ReopenWritableFile( return in_s; } } - IOStatus io_s = target()->ReopenWritableFile(fname, file_opts, result, dbg); + + bool exists; + IOStatus io_s, + exists_s = target()->FileExists(fname, IOOptions(), nullptr /* dbg */); + if (exists_s.IsNotFound()) { + exists = false; + } else if (exists_s.ok()) { + exists = true; + } else { + io_s = exists_s; + exists = false; + } + if (io_s.ok()) { - result->reset( - new TestFSWritableFile(fname, file_opts, std::move(*result), this)); - // WritableFileWriter* file is opened - // again then it will be truncated - so forget our saved state. - UntrackFile(fname); + io_s = target()->ReopenWritableFile(fname, file_opts, result, dbg); + } + + // Only track files we created. Files created outside of this + // `FaultInjectionTestFS` are not eligible for tracking/data dropping + // (for example, they may contain data a previous db_stress run expects to + // be recovered). This could be extended to track/drop data appended once + // the file is under `FaultInjectionTestFS`'s control. + if (io_s.ok()) { + bool should_track; { MutexLock l(&mutex_); - open_files_.insert(fname); - auto dir_and_name = TestFSGetDirAndName(fname); - auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; - list[dir_and_name.second] = kNewFileNoOverwrite; + if (db_file_state_.find(fname) != db_file_state_.end()) { + // It was written by this `FileSystem` earlier. + assert(exists); + should_track = true; + } else if (!exists) { + // It was created by this `FileSystem` just now. + should_track = true; + open_managed_files_.insert(fname); + auto dir_and_name = TestFSGetDirAndName(fname); + auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; + list[dir_and_name.second] = kNewFileNoOverwrite; + } else { + should_track = false; + } + } + if (should_track) { + result->reset( + new TestFSWritableFile(fname, file_opts, std::move(*result), this)); } { IOStatus in_s = InjectMetadataWriteError(); @@ -523,7 +554,7 @@ IOStatus FaultInjectionTestFS::NewRandomRWFile( UntrackFile(fname); { MutexLock l(&mutex_); - open_files_.insert(fname); + open_managed_files_.insert(fname); auto dir_and_name = TestFSGetDirAndName(fname); auto& list = dir_to_new_files_since_last_sync_[dir_and_name.first]; // It could be overwriting an old file, but we simplify the @@ -655,17 +686,62 @@ IOStatus FaultInjectionTestFS::RenameFile(const std::string& s, return io_s; } +IOStatus FaultInjectionTestFS::LinkFile(const std::string& s, + const std::string& t, + const IOOptions& options, + IODebugContext* dbg) { + if (!IsFilesystemActive()) { + return GetError(); + } + { + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } + + // Using the value in `dir_to_new_files_since_last_sync_` for the source file + // may be a more reasonable choice. + std::string previous_contents = kNewFileNoOverwrite; + + IOStatus io_s = FileSystemWrapper::LinkFile(s, t, options, dbg); + + if (io_s.ok()) { + { + MutexLock l(&mutex_); + if (db_file_state_.find(s) != db_file_state_.end()) { + db_file_state_[t] = db_file_state_[s]; + } + + auto sdn = TestFSGetDirAndName(s); + auto tdn = TestFSGetDirAndName(t); + if (dir_to_new_files_since_last_sync_[sdn.first].find(sdn.second) != + dir_to_new_files_since_last_sync_[sdn.first].end()) { + auto& tlist = dir_to_new_files_since_last_sync_[tdn.first]; + assert(tlist.find(tdn.second) == tlist.end()); + tlist[tdn.second] = previous_contents; + } + } + IOStatus in_s = InjectMetadataWriteError(); + if (!in_s.ok()) { + return in_s; + } + } + + return io_s; +} + void FaultInjectionTestFS::WritableFileClosed(const FSFileState& state) { MutexLock l(&mutex_); - if (open_files_.find(state.filename_) != open_files_.end()) { + if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) { db_file_state_[state.filename_] = state; - open_files_.erase(state.filename_); + open_managed_files_.erase(state.filename_); } } void FaultInjectionTestFS::WritableFileSynced(const FSFileState& state) { MutexLock l(&mutex_); - if (open_files_.find(state.filename_) != open_files_.end()) { + if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) { if (db_file_state_.find(state.filename_) == db_file_state_.end()) { db_file_state_.insert(std::make_pair(state.filename_, state)); } else { @@ -676,7 +752,7 @@ void FaultInjectionTestFS::WritableFileSynced(const FSFileState& state) { void FaultInjectionTestFS::WritableFileAppended(const FSFileState& state) { MutexLock l(&mutex_); - if (open_files_.find(state.filename_) != open_files_.end()) { + if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) { if (db_file_state_.find(state.filename_) == db_file_state_.end()) { db_file_state_.insert(std::make_pair(state.filename_, state)); } else { @@ -755,7 +831,7 @@ void FaultInjectionTestFS::UntrackFile(const std::string& f) { dir_to_new_files_since_last_sync_[dir_and_name.first].erase( dir_and_name.second); db_file_state_.erase(f); - open_files_.erase(f); + open_managed_files_.erase(f); } IOStatus FaultInjectionTestFS::InjectThreadSpecificReadError( diff --git a/utilities/fault_injection_fs.h b/utilities/fault_injection_fs.h index 2ed2b5c01..c160e2c40 100644 --- a/utilities/fault_injection_fs.h +++ b/utilities/fault_injection_fs.h @@ -236,6 +236,10 @@ class FaultInjectionTestFS : public FileSystemWrapper { const IOOptions& options, IODebugContext* dbg) override; + virtual IOStatus LinkFile(const std::string& src, const std::string& target, + const IOOptions& options, + IODebugContext* dbg) override; + // Undef to eliminate clash on Windows #undef GetFreeSpace virtual IOStatus GetFreeSpace(const std::string& path, @@ -321,7 +325,7 @@ class FaultInjectionTestFS : public FileSystemWrapper { MutexLock l(&mutex_); filesystem_writable_ = writable; } - void AssertNoOpenFile() { assert(open_files_.empty()); } + void AssertNoOpenFile() { assert(open_managed_files_.empty()); } IOStatus GetError() { return error_; } @@ -500,7 +504,7 @@ class FaultInjectionTestFS : public FileSystemWrapper { private: port::Mutex mutex_; std::map db_file_state_; - std::set open_files_; + std::set open_managed_files_; // directory -> (file name -> file contents to recover) // When data is recovered from unsyned parent directory, the files with // empty file contents to recover is deleted. Those with non-empty ones