From b1a3b4c0d0916383b045e87aa8207b6773f81d03 Mon Sep 17 00:00:00 2001 From: Gunnar Kudrjavets Date: Wed, 6 Jan 2016 14:19:08 -0800 Subject: [PATCH] Make ldb automagically determine the file type and use the correct dumping function Summary: This set of changes implements the following design: `ldb` will utilize `--path` parameter which can be used to specify a file name. Tool will then apply some heuristic to determine how to output the data properly. The design decision is not to probe the file content, but use file names to determine what dumping function to call. Usage examples: Understands that path points to a manifest file and dumps it. `./ldb --path=/tmp/test_db/MANIFEST-000023 dump` Understands that path points to a WAL file and dumps it. `./ldb --path=/tmp/test_db/000024.log dump --header` Understands that path points to a SST file and dumps it. `./ldb --path=/tmp/test_db/000007.sst dump` Figures out that none of the supported file types are applicable and outputs an appropriate error message. `./ldb --path=/tmp/cron.log dump` Test Plan: Basics: git diff make clean make -j 32 commit-prereq arc lint More specific testing (done as part of commit-prereq, but can be iterated separately when making isolated changes): make clean make ldb python tools/ldb_test.py make rocksdb_dump make rocksdb_undump sh tools/rocksdb_dump_test.sh Reviewers: rven, IslamAbdelRahman, yhchiang, kradhakrishnan, anthony, igor, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D52269 --- HISTORY.md | 5 +- tools/ldb_cmd.cc | 130 ++++++++++++++++++++++++++++++++++------------ tools/ldb_cmd.h | 30 ++++++++--- tools/ldb_test.py | 53 +++++++++++++++++++ 4 files changed, 176 insertions(+), 42 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index da9997ee0..2400411d4 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ * Introduce CompactionJobInfo::compaction_reason, this field include the reason to trigger the compaction. * After slow down is triggered, if estimated pending compaction bytes keep increasing, slowdown more. * Increase default options.delayed_write_rate to 2MB/s. +* Added a new parameter --path to ldb tool. --path accepts the name of either MANIFEST, SST or a WAL file. Either --db or --path can be used when calling ldb. ## 4.3.0 (12/8/2015) ### New Features @@ -21,8 +22,8 @@ ## 4.2.0 (11/9/2015) ### New Features -* Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions. -* Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families. +* Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions. +* Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families. * Add MemoryUtil in rocksdb/utilities/memory.h. It currently offers a way to get the memory usage by type from a list rocksdb instances. ### Public API Changes diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index b9adef02a..7ec4690d0 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -40,6 +40,7 @@ namespace rocksdb { using namespace std; const string LDBCommand::ARG_DB = "db"; +const string LDBCommand::ARG_PATH = "path"; const string LDBCommand::ARG_HEX = "hex"; const string LDBCommand::ARG_KEY_HEX = "key_hex"; const string LDBCommand::ARG_VALUE_HEX = "value_hex"; @@ -62,6 +63,14 @@ const string LDBCommand::ARG_CREATE_IF_MISSING = "create_if_missing"; const char* LDBCommand::DELIM = " ==> "; +namespace { + +void DumpWalFile(std::string wal_file, bool print_header, bool print_values, + LDBCommandExecuteResult* exec_state); + +void DumpSstFile(std::string filename, bool output_hex, bool show_properties); +}; + LDBCommand* LDBCommand::InitFromCmdLineArgs( int argc, char** argv, @@ -394,8 +403,10 @@ bool LDBCommand::ValidateCmdLineOptions() { } } - if (!NoDBOpen() && option_map_.find(ARG_DB) == option_map_.end()) { - fprintf(stderr, "%s must be specified\n", ARG_DB.c_str()); + if (!NoDBOpen() && option_map_.find(ARG_DB) == option_map_.end() && + option_map_.find(ARG_PATH) == option_map_.end()) { + fprintf(stderr, "Either %s or %s must be specified.\n", ARG_DB.c_str(), + ARG_PATH.c_str()); return false; } @@ -733,21 +744,20 @@ const string InternalDumpCommand::ARG_INPUT_KEY_HEX = "input_key_hex"; InternalDumpCommand::InternalDumpCommand(const vector& params, const map& options, - const vector& flags) : - LDBCommand(options, flags, true, - BuildCmdLineOptions({ ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, - ARG_FROM, ARG_TO, ARG_MAX_KEYS, - ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS, - ARG_INPUT_KEY_HEX})), - has_from_(false), - has_to_(false), - max_keys_(-1), - delim_("."), - count_only_(false), - count_delim_(false), - print_stats_(false), - is_input_key_hex_(false) { - + const vector& flags) + : LDBCommand( + options, flags, true, + BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, + ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, + ARG_COUNT_DELIM, ARG_STATS, ARG_INPUT_KEY_HEX})), + has_from_(false), + has_to_(false), + max_keys_(-1), + delim_("."), + count_only_(false), + count_delim_(false), + print_stats_(false), + is_input_key_hex_(false) { has_from_ = ParseStringOption(options, ARG_FROM, &from_); has_to_ = ParseStringOption(options, ARG_TO, &to_); @@ -891,21 +901,20 @@ const string DBDumperCommand::ARG_STATS = "stats"; const string DBDumperCommand::ARG_TTL_BUCKET = "bucket"; DBDumperCommand::DBDumperCommand(const vector& params, - const map& options, const vector& flags) : - LDBCommand(options, flags, true, - BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX, - ARG_VALUE_HEX, ARG_FROM, ARG_TO, - ARG_MAX_KEYS, ARG_COUNT_ONLY, - ARG_COUNT_DELIM, ARG_STATS, ARG_TTL_START, - ARG_TTL_END, ARG_TTL_BUCKET, - ARG_TIMESTAMP})), - null_from_(true), - null_to_(true), - max_keys_(-1), - count_only_(false), - count_delim_(false), - print_stats_(false) { - + const map& options, + const vector& flags) + : LDBCommand(options, flags, true, + BuildCmdLineOptions( + {ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, + ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, + ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET, + ARG_TIMESTAMP, ARG_PATH})), + null_from_(true), + null_to_(true), + max_keys_(-1), + count_only_(false), + count_delim_(false), + print_stats_(false) { map::const_iterator itr = options.find(ARG_FROM); if (itr != options.end()) { null_from_ = false; @@ -954,6 +963,11 @@ DBDumperCommand::DBDumperCommand(const vector& params, to_ = HexToString(to_); } } + + itr = options.find(ARG_PATH); + if (itr != options.end()) { + path_ = itr->second; + } } void DBDumperCommand::Help(string& ret) { @@ -969,13 +983,63 @@ void DBDumperCommand::Help(string& ret) { ret.append(" [--" + ARG_TTL_BUCKET + "=]"); ret.append(" [--" + ARG_TTL_START + "=:- is inclusive]"); ret.append(" [--" + ARG_TTL_END + "=:- is exclusive]"); + ret.append(" [--" + ARG_PATH + "=]"); ret.append("\n"); } +/** + * Handles two separate cases: + * + * 1) --db is specified - just dump the database. + * + * 2) --path is specified - determine based on file extension what dumping + * function to call. Please note that we intentionally use the extension + * and avoid probing the file contents under the assumption that renaming + * the files is not a supported scenario. + * + */ void DBDumperCommand::DoCommand() { if (!db_) { - return; + assert(!path_.empty()); + string fileName = GetFileNameFromPath(path_); + uint64_t number; + FileType type; + + exec_state_ = LDBCommandExecuteResult::Succeed(""); + + if (!ParseFileName(fileName, &number, &type)) { + exec_state_ = + LDBCommandExecuteResult::Failed("Can't parse file type: " + path_); + return; + } + + switch (type) { + case kLogFile: + DumpWalFile(path_, /* print_header_ */ true, /* print_values_ */ true, + &exec_state_); + break; + case kTableFile: + DumpSstFile(path_, is_key_hex_, /* show_properties */ true); + break; + case kDescriptorFile: + DumpManifestFile(path_, /* verbose_ */ false, is_key_hex_, + /* json_ */ false); + break; + default: + exec_state_ = LDBCommandExecuteResult::Failed( + "File type not supported: " + path_); + break; + } + + } else { + DoDumpCommand(); } +} + +void DBDumperCommand::DoDumpCommand() { + assert(nullptr != db_); + assert(path_.empty()); + // Parse command line args uint64_t count = 0; if (print_stats_) { diff --git a/tools/ldb_cmd.h b/tools/ldb_cmd.h index 6a77b9710..0c048e794 100644 --- a/tools/ldb_cmd.h +++ b/tools/ldb_cmd.h @@ -40,6 +40,7 @@ public: // Command-line arguments static const string ARG_DB; + static const string ARG_PATH; static const string ARG_HEX; static const string ARG_KEY_HEX; static const string ARG_VALUE_HEX; @@ -90,10 +91,8 @@ public: } virtual ~LDBCommand() { - if (db_ != nullptr) { - delete db_; - db_ = nullptr; - } + delete db_; + db_ = nullptr; } /* Run the command, and return the execute result. */ @@ -104,12 +103,12 @@ public: if (db_ == nullptr && !NoDBOpen()) { OpenDB(); - if (!exec_state_.IsNotStarted()) { - return; - } } + // We'll intentionally proceed even if the DB can't be opened because users + // can also specify a filename, not just a directory. DoCommand(); + if (exec_state_.IsNotStarted()) { exec_state_ = LDBCommandExecuteResult::Succeed(""); } @@ -441,6 +440,22 @@ public: virtual void DoCommand() override; private: + /** + * Extract file name from the full path. We handle both the forward slash (/) + * and backslash (\) to make sure that different OS-s are supported. + */ + static string GetFileNameFromPath(const string& s) { + std::size_t n = s.find_last_of("/\\"); + + if (std::string::npos == n) { + return s; + } else { + return s.substr(n + 1); + } + } + + void DoDumpCommand(); + bool null_from_; string from_; bool null_to_; @@ -450,6 +465,7 @@ private: bool count_only_; bool count_delim_; bool print_stats_; + string path_; static const string ARG_COUNT_ONLY; static const string ARG_COUNT_DELIM; diff --git a/tools/ldb_test.py b/tools/ldb_test.py index bcf362404..471232419 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -408,6 +408,12 @@ class LDBTestCase(unittest.TestCase): def getManifests(self, directory): return glob.glob(directory + "/MANIFEST-*") + def getSSTFiles(self, directory): + return glob.glob(directory + "/*.sst") + + def getWALFiles(self, directory): + return glob.glob(directory + "/*.log") + def copyManifests(self, src, dest): return 0 == run_err_null("cp " + src + " " + dest) @@ -439,6 +445,53 @@ class LDBTestCase(unittest.TestCase): % (dbPath, manifest_files[1]), expected_pattern, unexpected=False, isPattern=True) + # Make sure that using the dump with --path will result in identical + # output as just using manifest_dump. + cmd = "dump --path=%s" + self.assertRunOKFull((cmd) + % (manifest_files[1]), + expected_pattern, unexpected=False, + isPattern=True) + + def testSSTDump(self): + print "Running testSSTDump..." + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK") + self.assertRunOK("put sst2 sst2_val", "OK") + self.assertRunOK("get sst1", "sst1_val") + + # Pattern to expect from SST dump. + regex = ".*Sst file format:.*" + expected_pattern = re.compile(regex) + + sst_files = self.getSSTFiles(dbPath) + self.assertTrue(len(sst_files) >= 1) + cmd = "dump --path=%s" + self.assertRunOKFull((cmd) + % (sst_files[0]), + expected_pattern, unexpected=False, + isPattern=True) + + def testWALDump(self): + print "Running testWALDump..." + + dbPath = os.path.join(self.TMP_DIR, self.DB_NAME) + self.assertRunOK("put wal1 wal1_val --create_if_missing", "OK") + self.assertRunOK("put wal2 wal2_val", "OK") + self.assertRunOK("get wal1", "wal1_val") + + # Pattern to expect from WAL dump. + regex = "^Sequence,Count,ByteSize,Physical Offset,Key\(s\).*" + expected_pattern = re.compile(regex) + + wal_files = self.getWALFiles(dbPath) + self.assertTrue(len(wal_files) >= 1) + cmd = "dump --path=%s" + self.assertRunOKFull((cmd) + % (wal_files[0]), + expected_pattern, unexpected=False, + isPattern=True) def testListColumnFamilies(self): print "Running testListColumnFamilies..."