From 02df00d97b4354b4d2fbd7c7dd182eed7d150339 Mon Sep 17 00:00:00 2001 From: Zitan Chen Date: Wed, 3 Jun 2020 18:55:25 -0700 Subject: [PATCH] API change: DB::OpenForReadOnly will not write to the file system unless create_if_missing is true (#6900) Summary: DB::OpenForReadOnly will not write anything to the file system (i.e., create directories or files for the DB) unless create_if_missing is true. This change also fixes some subcommands of ldb, which write to the file system even if the purpose is for readonly. Two tests for this updated behavior of DB::OpenForReadOnly are also added. Other minor changes: 1. Updated HISTORY.md to include this API change of DB::OpenForReadOnly; 2. Updated the help information for the put and batchput subcommands of ldb with the option [--create_if_missing]; 3. Updated the comment of Env::DeleteDir to emphasize that it returns OK only if the directory to be deleted is empty. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6900 Test Plan: passed make check; also manually tested a few ldb subcommands Reviewed By: pdillinger Differential Revision: D21822188 Pulled By: gg814 fbshipit-source-id: 604cc0f0d0326a937ee25a32cdc2b512f9a3be6e --- HISTORY.md | 1 + db/db_impl/db_impl_readonly.cc | 47 ++++++++++++++++++++++++-- db/db_impl/db_impl_readonly.h | 9 +++++ db/db_test2.cc | 61 ++++++++++++++++++++++++++++++++++ include/rocksdb/env.h | 2 ++ tools/ldb_cmd.cc | 4 ++- 6 files changed, 121 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e895dd0d2..0edb91016 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -17,6 +17,7 @@ * BlobDB now explicitly disallows using the default column family's storage directories as blob directory. * DeleteRange now returns `Status::InvalidArgument` if the range's end key comes before its start key according to the user comparator. Previously the behavior was undefined. * ldb now uses options.force_consistency_checks = true by default and "--disable_consistency_checks" is added to disable it. +* DB::OpenForReadOnly no longer creates files or directories if the named DB does not exist, unless create_if_missing is set to true. * The consistency checks that validate LSM state changes (table file additions/deletions during flushes and compactions) are now stricter, more efficient, and no longer optional, i.e. they are performed even if `force_consistency_checks` is `false`. ### New Feature diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index d728a4f68..d39ff44d4 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -128,12 +128,38 @@ Status DBImplReadOnly::NewIterators( return Status::OK(); } +namespace { +// Return OK if dbname exists in the file system +// or create_if_missing is false +Status OpenForReadOnlyCheckExistence(const DBOptions& db_options, + const std::string& dbname) { + Status s; + if (!db_options.create_if_missing) { + // Attempt to read "CURRENT" file + const std::shared_ptr& fs = db_options.env->GetFileSystem(); + std::string manifest_path; + uint64_t manifest_file_number; + s = VersionSet::GetCurrentManifestPath(dbname, fs.get(), &manifest_path, + &manifest_file_number); + if (!s.ok()) { + return Status::NotFound(CurrentFileName(dbname), "does not exist"); + } + } + return s; +} +} // namespace + Status DB::OpenForReadOnly(const Options& options, const std::string& dbname, DB** dbptr, bool /*error_if_log_file_exist*/) { + // If dbname does not exist in the file system, should not do anything + Status s = OpenForReadOnlyCheckExistence(options, dbname); + if (!s.ok()) { + return s; + } + *dbptr = nullptr; // Try to first open DB as fully compacted DB - Status s; s = CompactedDBImpl::Open(options, dbname, dbptr); if (s.ok()) { return s; @@ -146,7 +172,8 @@ Status DB::OpenForReadOnly(const Options& options, const std::string& dbname, ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); std::vector handles; - s = DB::OpenForReadOnly(db_options, dbname, column_families, &handles, dbptr); + s = DBImplReadOnly::OpenForReadOnlyWithoutCheck( + db_options, dbname, column_families, &handles, dbptr); if (s.ok()) { assert(handles.size() == 1); // i can delete the handle since DBImpl is always holding a @@ -161,6 +188,22 @@ Status DB::OpenForReadOnly( const std::vector& column_families, std::vector* handles, DB** dbptr, bool error_if_log_file_exist) { + // If dbname does not exist in the file system, should not do anything + Status s = OpenForReadOnlyCheckExistence(db_options, dbname); + if (!s.ok()) { + return s; + } + + return DBImplReadOnly::OpenForReadOnlyWithoutCheck( + db_options, dbname, column_families, handles, dbptr, + error_if_log_file_exist); +} + +Status DBImplReadOnly::OpenForReadOnlyWithoutCheck( + const DBOptions& db_options, const std::string& dbname, + const std::vector& column_families, + std::vector* handles, DB** dbptr, + bool error_if_log_file_exist) { *dbptr = nullptr; handles->clear(); diff --git a/db/db_impl/db_impl_readonly.h b/db/db_impl/db_impl_readonly.h index 04d06b4a1..c19a43899 100644 --- a/db/db_impl/db_impl_readonly.h +++ b/db/db_impl/db_impl_readonly.h @@ -130,6 +130,15 @@ class DBImplReadOnly : public DBImpl { } private: + // A "helper" function for DB::OpenForReadOnly without column families + // to reduce unnecessary I/O + // It has the same functionality as DB::OpenForReadOnly with column families + // but does not check the existence of dbname in the file system + static Status OpenForReadOnlyWithoutCheck( + const DBOptions& db_options, const std::string& dbname, + const std::vector& column_families, + std::vector* handles, DB** dbptr, + bool error_if_log_file_exist = false); friend class DB; }; } // namespace ROCKSDB_NAMESPACE diff --git a/db/db_test2.cc b/db/db_test2.cc index 26da04417..83f3ee71d 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -25,6 +25,67 @@ class DBTest2 : public DBTestBase { DBTest2() : DBTestBase("/db_test2") {} }; +TEST_F(DBTest2, OpenForReadOnly) { + DB* db_ptr = nullptr; + std::string dbname = test::PerThreadDBPath("db_readonly"); + Options options = CurrentOptions(); + options.create_if_missing = true; + // OpenForReadOnly should fail but will create in the file system + ASSERT_NOK(DB::OpenForReadOnly(options, dbname, &db_ptr)); + // Since is created, we should be able to delete the dir + // We first get the list files under + // There should not be any subdirectories -- this is not checked here + std::vector files; + ASSERT_OK(env_->GetChildren(dbname, &files)); + for (auto& f : files) { + if (f != "." && f != "..") { + ASSERT_OK(env_->DeleteFile(dbname + "/" + f)); + } + } + // should be empty now and we should be able to delete it + ASSERT_OK(env_->DeleteDir(dbname)); + options.create_if_missing = false; + // OpenForReadOnly should fail since was successfully deleted + ASSERT_NOK(DB::OpenForReadOnly(options, dbname, &db_ptr)); + // With create_if_missing false, there should not be a dir in the file system + ASSERT_NOK(env_->FileExists(dbname)); +} + +TEST_F(DBTest2, OpenForReadOnlyWithColumnFamilies) { + DB* db_ptr = nullptr; + std::string dbname = test::PerThreadDBPath("db_readonly"); + Options options = CurrentOptions(); + options.create_if_missing = true; + + ColumnFamilyOptions cf_options(options); + std::vector column_families; + column_families.push_back( + ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options)); + column_families.push_back(ColumnFamilyDescriptor("goku", cf_options)); + std::vector handles; + // OpenForReadOnly should fail but will create in the file system + ASSERT_NOK( + DB::OpenForReadOnly(options, dbname, column_families, &handles, &db_ptr)); + // Since is created, we should be able to delete the dir + // We first get the list files under + // There should not be any subdirectories -- this is not checked here + std::vector files; + ASSERT_OK(env_->GetChildren(dbname, &files)); + for (auto& f : files) { + if (f != "." && f != "..") { + ASSERT_OK(env_->DeleteFile(dbname + "/" + f)); + } + } + // should be empty now and we should be able to delete it + ASSERT_OK(env_->DeleteDir(dbname)); + options.create_if_missing = false; + // OpenForReadOnly should fail since was successfully deleted + ASSERT_NOK( + DB::OpenForReadOnly(options, dbname, column_families, &handles, &db_ptr)); + // With create_if_missing false, there should not be a dir in the file system + ASSERT_NOK(env_->FileExists(dbname)); +} + class PrefixFullBloomWithReverseComparator : public DBTestBase, public ::testing::WithParamInterface { diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 733de0977..e1c54d751 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -313,6 +313,8 @@ class Env { virtual Status CreateDirIfMissing(const std::string& dirname) = 0; // Delete the specified directory. + // Many implementations of this function will only delete a directory if it is + // empty. virtual Status DeleteDir(const std::string& dirname) = 0; // Store the size of fname in *file_size. diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index abc84a578..7d86fa3c4 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -2445,6 +2445,7 @@ void BatchPutCommand::Help(std::string& ret) { ret.append(" "); ret.append(BatchPutCommand::Name()); ret.append(" [ ] [..]"); + ret.append(" [--" + ARG_CREATE_IF_MISSING + "]"); ret.append(" [--" + ARG_TTL + "]"); ret.append("\n"); } @@ -2731,7 +2732,8 @@ PutCommand::PutCommand(const std::vector& params, void PutCommand::Help(std::string& ret) { ret.append(" "); ret.append(PutCommand::Name()); - ret.append(" "); + ret.append(" "); + ret.append(" [--" + ARG_CREATE_IF_MISSING + "]"); ret.append(" [--" + ARG_TTL + "]"); ret.append("\n"); }