From ac29645743fb9be5c02fcdaf6fbd2a71a832ef39 Mon Sep 17 00:00:00 2001 From: yuzhangyu Date: Fri, 22 Apr 2022 16:54:43 -0700 Subject: [PATCH] Add blob dump support to the dump_live_files command (#9896) Summary: This patch completes the second part of the task: "Add blob support to the dump and dump_live_files command" Pull Request resolved: https://github.com/facebook/rocksdb/pull/9896 Reviewed By: ltamasi Differential Revision: D35852667 Pulled By: jowlyzhang fbshipit-source-id: a006456c881f468a92da689e895134762e9574e1 --- tools/ldb_cmd.cc | 58 ++++++++++++++++++++++++++++++++------------ tools/ldb_cmd_impl.h | 1 + tools/ldb_test.py | 11 ++++++++- 3 files changed, 54 insertions(+), 16 deletions(-) diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index a6a055dd4..638fbe262 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -3574,13 +3574,17 @@ DBFileDumperCommand::DBFileDumperCommand( const std::map& options, const std::vector& flags) : LDBCommand(options, flags, true, - BuildCmdLineOptions({ARG_DECODE_BLOB_INDEX})), - decode_blob_index_(IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX)) {} + BuildCmdLineOptions( + {ARG_DECODE_BLOB_INDEX, ARG_DUMP_UNCOMPRESSED_BLOBS})), + decode_blob_index_(IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX)), + dump_uncompressed_blobs_( + IsFlagPresent(flags, ARG_DUMP_UNCOMPRESSED_BLOBS)) {} void DBFileDumperCommand::Help(std::string& ret) { ret.append(" "); ret.append(DBFileDumperCommand::Name()); ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "] "); + ret.append(" [--" + ARG_DUMP_UNCOMPRESSED_BLOBS + "] "); ret.append("\n"); } @@ -3591,6 +3595,8 @@ void DBFileDumperCommand::DoCommand() { } Status s; + // TODO: Use --hex, --key_hex, --value_hex flags consistently for + // dumping manifest file, sst files and blob files. std::cout << "Manifest File" << std::endl; std::cout << "==============================" << std::endl; std::string manifest_filename; @@ -3613,20 +3619,42 @@ void DBFileDumperCommand::DoCommand() { DumpManifestFile(options_, manifest_filepath, false, false, false); std::cout << std::endl; - std::cout << "SST Files" << std::endl; - std::cout << "==============================" << std::endl; - std::vector metadata; - db_->GetLiveFilesMetaData(&metadata); - for (auto& fileMetadata : metadata) { - std::string filename = fileMetadata.db_path + "/" + fileMetadata.name; - // Correct concatenation of filepath and filename: - // Check that there is no double slashes (or more!) when concatenation - // happens. - filename = NormalizePath(filename); - std::cout << filename << " level:" << fileMetadata.level << std::endl; - std::cout << "------------------------------" << std::endl; - DumpSstFile(options_, filename, false, true, decode_blob_index_); + std::vector column_families; + db_->GetAllColumnFamilyMetaData(&column_families); + for (const auto& column_family : column_families) { + std::cout << "Column family name: " << column_family.name << std::endl; + std::cout << "==============================" << std::endl; std::cout << std::endl; + std::cout << "SST Files" << std::endl; + std::cout << "==============================" << std::endl; + for (const LevelMetaData& level : column_family.levels) { + for (const SstFileMetaData& sst_file : level.files) { + std::string filename = sst_file.db_path + "/" + sst_file.name; + // Correct concatenation of filepath and filename: + // Check that there is no double slashes (or more!) when concatenation + // happens. + filename = NormalizePath(filename); + std::cout << filename << " level:" << level.level << std::endl; + std::cout << "------------------------------" << std::endl; + DumpSstFile(options_, filename, false, true, decode_blob_index_); + std::cout << std::endl; + } + } + std::cout << "Blob Files" << std::endl; + std::cout << "==============================" << std::endl; + for (const BlobMetaData& blob_file : column_family.blob_files) { + std::string filename = + blob_file.blob_file_path + "/" + blob_file.blob_file_name; + // Correct concatenation of filepath and filename: + // Check that there is no double slashes (or more!) when concatenation + // happens. + filename = NormalizePath(filename); + std::cout << filename << std::endl; + std::cout << "------------------------------" << std::endl; + DumpBlobFile(filename, /* is_key_hex */ false, /* is_value_hex */ false, + dump_uncompressed_blobs_); + std::cout << std::endl; + } } std::cout << std::endl; diff --git a/tools/ldb_cmd_impl.h b/tools/ldb_cmd_impl.h index 04a81f8c3..18af43574 100644 --- a/tools/ldb_cmd_impl.h +++ b/tools/ldb_cmd_impl.h @@ -47,6 +47,7 @@ class DBFileDumperCommand : public LDBCommand { private: bool decode_blob_index_; + bool dump_uncompressed_blobs_; }; class DBLiveFilesMetadataDumperCommand : public LDBCommand { diff --git a/tools/ldb_test.py b/tools/ldb_test.py index c518eb282..5f220f75e 100644 --- a/tools/ldb_test.py +++ b/tools/ldb_test.py @@ -488,7 +488,7 @@ class LDBTestCase(unittest.TestCase): dbPath += "/" # Call the dump_live_files function with the edited dbPath name. - self.assertTrue(self.dumpLiveFiles("--db=%s --decode_blob_index" % dbPath, dumpFilePath)) + self.assertTrue(self.dumpLiveFiles("--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath, dumpFilePath)) # Investigate the output with open(dumpFilePath, "r") as tmp: @@ -496,13 +496,22 @@ class LDBTestCase(unittest.TestCase): # Check that all the SST filenames have a correct full path (no multiple '/'). sstFileList = re.findall(r"%s.*\d+.sst" % dbPath, data) + self.assertTrue(len(sstFileList) >= 1) for sstFilename in sstFileList: filenumber = re.findall(r"\d+.sst", sstFilename)[0] self.assertEqual(sstFilename, dbPath+filenumber) + # Check that all the Blob filenames have a correct full path (no multiple '/'). + blobFileList = re.findall(r"%s.*\d+.blob" % dbPath, data) + self.assertTrue(len(blobFileList) >= 1) + for blobFilename in blobFileList: + filenumber = re.findall(r"\d+.blob", blobFilename)[0] + self.assertEqual(blobFilename, dbPath+filenumber) + # Check that all the manifest filenames # have a correct full path (no multiple '/'). manifestFileList = re.findall(r"%s.*MANIFEST-\d+" % dbPath, data) + self.assertTrue(len(manifestFileList) >= 1) for manifestFilename in manifestFileList: filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0] self.assertEqual(manifestFilename, dbPath+"MANIFEST-"+filenumber)