diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index f0ff8bc6b..51f513832 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -852,8 +852,8 @@ class DBImpl : public DB { InstrumentedMutex* mutex() const { return &mutex_; } // Initialize a brand new DB. The DB directory is expected to be empty before - // calling it. - Status NewDB(); + // calling it. Push new manifest file name into `new_filenames`. + Status NewDB(std::vector* new_filenames); // This is to be used only by internal rocksdb classes. static Status Open(const DBOptions& db_options, const std::string& name, diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index e6f89cdbb..e34d4f3e3 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -253,7 +253,7 @@ Status DBImpl::ValidateOptions(const DBOptions& db_options) { return Status::OK(); } -Status DBImpl::NewDB() { +Status DBImpl::NewDB(std::vector* new_filenames) { VersionEdit new_db; Status s = SetIdentityFile(env_, dbname_); if (!s.ok()) { @@ -293,6 +293,10 @@ Status DBImpl::NewDB() { if (s.ok()) { // Make "CURRENT" file that points to the new manifest file. s = SetCurrentFile(fs_.get(), dbname_, 1, directories_.GetDbDir()); + if (new_filenames) { + new_filenames->emplace_back( + manifest.substr(manifest.find_last_of("/\\") + 1)); + } } else { fs_->DeleteFile(manifest, IOOptions(), nullptr); } @@ -356,6 +360,7 @@ Status DBImpl::Recover( bool is_new_db = false; assert(db_lock_ == nullptr); + std::vector files_in_dbname; if (!read_only) { Status s = directories_.SetDirectories(fs_.get(), dbname_, immutable_db_options_.wal_dir, @@ -379,10 +384,12 @@ Status DBImpl::Recover( s = env_->FileExists(current_fname); } else { s = Status::NotFound(); - std::vector files; - // No need to check return value - env_->GetChildren(dbname_, &files); - for (const std::string& file : files) { + Status io_s = env_->GetChildren(dbname_, &files_in_dbname); + if (!io_s.ok()) { + s = io_s; + files_in_dbname.clear(); + } + for (const std::string& file : files_in_dbname) { uint64_t number = 0; FileType type = kLogFile; // initialize if (ParseFileName(file, &number, &type) && type == kDescriptorFile) { @@ -396,7 +403,7 @@ Status DBImpl::Recover( } if (s.IsNotFound()) { if (immutable_db_options_.create_if_missing) { - s = NewDB(); + s = NewDB(&files_in_dbname); is_new_db = true; if (!s.ok()) { return s; @@ -438,6 +445,16 @@ Status DBImpl::Recover( } } } + } else if (immutable_db_options_.best_efforts_recovery) { + assert(files_in_dbname.empty()); + Status s = env_->GetChildren(dbname_, &files_in_dbname); + if (s.IsNotFound()) { + return Status::InvalidArgument(dbname_, + "does not exist (open for read only)"); + } else if (s.IsIOError()) { + return s; + } + assert(s.ok()); } assert(db_id_.empty()); Status s; @@ -445,8 +462,9 @@ Status DBImpl::Recover( if (!immutable_db_options_.best_efforts_recovery) { s = versions_->Recover(column_families, read_only, &db_id_); } else { - s = versions_->TryRecover(column_families, read_only, &db_id_, - &missing_table_file); + assert(!files_in_dbname.empty()); + s = versions_->TryRecover(column_families, read_only, files_in_dbname, + &db_id_, &missing_table_file); if (s.ok()) { // TryRecover may delete previous column_family_set_. column_family_memtables_.reset( @@ -506,6 +524,7 @@ Status DBImpl::Recover( s = InitPersistStatsColumnFamily(); } + std::vector files_in_wal_dir; if (s.ok()) { // Initial max_total_in_memory_state_ before recovery logs. Log recovery // may check this value to decide whether to flush. @@ -532,9 +551,8 @@ Status DBImpl::Recover( // Note that prev_log_number() is no longer used, but we pay // attention to it in case we are recovering a database // produced by an older version of rocksdb. - std::vector filenames; if (!immutable_db_options_.best_efforts_recovery) { - s = env_->GetChildren(immutable_db_options_.wal_dir, &filenames); + s = env_->GetChildren(immutable_db_options_.wal_dir, &files_in_wal_dir); } if (s.IsNotFound()) { return Status::InvalidArgument("wal_dir not found", @@ -544,15 +562,15 @@ Status DBImpl::Recover( } std::vector logs; - for (size_t i = 0; i < filenames.size(); i++) { + for (const auto& file : files_in_wal_dir) { uint64_t number; FileType type; - if (ParseFileName(filenames[i], &number, &type) && type == kLogFile) { + if (ParseFileName(file, &number, &type) && type == kLogFile) { if (is_new_db) { return Status::Corruption( "While creating a new Db, wal_dir contains " "existing log file: ", - filenames[i]); + file); } else { logs.push_back(number); } @@ -604,15 +622,24 @@ Status DBImpl::Recover( // to reflect the most recent OPTIONS file. It does not matter for regular // read-write db instance because options_file_number_ will later be // updated to versions_->NewFileNumber() in RenameTempFileToOptionsFile. - std::vector file_names; + std::vector filenames; if (s.ok()) { - s = env_->GetChildren(GetName(), &file_names); + const std::string normalized_dbname = NormalizePath(dbname_); + const std::string normalized_wal_dir = + NormalizePath(immutable_db_options_.wal_dir); + if (immutable_db_options_.best_efforts_recovery) { + filenames = std::move(files_in_dbname); + } else if (normalized_dbname == normalized_wal_dir) { + filenames = std::move(files_in_wal_dir); + } else { + s = env_->GetChildren(GetName(), &filenames); + } } if (s.ok()) { uint64_t number = 0; uint64_t options_file_number = 0; FileType type; - for (const auto& fname : file_names) { + for (const auto& fname : filenames) { if (ParseFileName(fname, &number, &type) && type == kOptionsFile) { options_file_number = std::max(number, options_file_number); } @@ -620,7 +647,6 @@ Status DBImpl::Recover( versions_->options_file_number_ = options_file_number; } } - return s; } diff --git a/db/repair.cc b/db/repair.cc index da1b3641d..91ce51e82 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -185,7 +185,7 @@ class Repairer { DBImpl* db_impl = new DBImpl(db_options_, dbname_); // Also use this temp DBImpl to get a session id db_impl->GetDbSessionId(db_session_id_); - status = db_impl->NewDB(); + status = db_impl->NewDB(/*new_filenames=*/nullptr); delete db_impl; } diff --git a/db/version_set.cc b/db/version_set.cc index 94fe7ba34..3ac5b0c83 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -4730,34 +4730,25 @@ Status VersionSet::Recover( namespace { class ManifestPicker { public: - explicit ManifestPicker(const std::string& dbname, FileSystem* fs); - void SeekToFirstManifest(); + explicit ManifestPicker(const std::string& dbname, + const std::vector& files_in_dbname); // REQUIRES Valid() == true std::string GetNextManifest(uint64_t* file_number, std::string* file_name); bool Valid() const { return manifest_file_iter_ != manifest_files_.end(); } - const Status& status() const { return status_; } private: const std::string& dbname_; - FileSystem* const fs_; // MANIFEST file names(s) std::vector manifest_files_; std::vector::const_iterator manifest_file_iter_; - Status status_; }; -ManifestPicker::ManifestPicker(const std::string& dbname, FileSystem* fs) - : dbname_(dbname), fs_(fs) {} - -void ManifestPicker::SeekToFirstManifest() { - assert(fs_ != nullptr); - std::vector children; - Status s = fs_->GetChildren(dbname_, IOOptions(), &children, /*dbg=*/nullptr); - if (!s.ok()) { - status_ = s; - return; - } - for (const auto& fname : children) { +ManifestPicker::ManifestPicker(const std::string& dbname, + const std::vector& files_in_dbname) + : dbname_(dbname) { + // populate manifest files + assert(!files_in_dbname.empty()); + for (const auto& fname : files_in_dbname) { uint64_t file_num = 0; FileType file_type; bool parse_ok = ParseFileName(fname, &file_num, &file_type); @@ -4765,6 +4756,7 @@ void ManifestPicker::SeekToFirstManifest() { manifest_files_.push_back(fname); } } + // seek to first manifest std::sort(manifest_files_.begin(), manifest_files_.end(), [](const std::string& lhs, const std::string& rhs) { uint64_t num1 = 0; @@ -4787,7 +4779,6 @@ void ManifestPicker::SeekToFirstManifest() { std::string ManifestPicker::GetNextManifest(uint64_t* number, std::string* file_name) { - assert(status_.ok()); assert(Valid()); std::string ret; if (manifest_file_iter_ != manifest_files_.end()) { @@ -4817,16 +4808,13 @@ std::string ManifestPicker::GetNextManifest(uint64_t* number, Status VersionSet::TryRecover( const std::vector& column_families, bool read_only, - std::string* db_id, bool* has_missing_table_file) { - ManifestPicker manifest_picker(dbname_, fs_); - manifest_picker.SeekToFirstManifest(); - Status s = manifest_picker.status(); - if (!s.ok()) { - return s; - } + const std::vector& files_in_dbname, std::string* db_id, + bool* has_missing_table_file) { + ManifestPicker manifest_picker(dbname_, files_in_dbname); if (!manifest_picker.Valid()) { return Status::Corruption("Cannot locate MANIFEST file in " + dbname_); } + Status s; std::string manifest_path = manifest_picker.GetNextManifest(&manifest_file_number_, nullptr); while (!manifest_path.empty()) { diff --git a/db/version_set.h b/db/version_set.h index 8941efb9d..2f7effda4 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -962,8 +962,9 @@ class VersionSet { bool read_only = false, std::string* db_id = nullptr); Status TryRecover(const std::vector& column_families, - bool read_only, std::string* db_id, - bool* has_missing_table_file); + bool read_only, + const std::vector& files_in_dbname, + std::string* db_id, bool* has_missing_table_file); // Try to recover the version set to the most recent consistent state // recorded in the specified manifest.