Add blob dump support to the dump_live_files command (#9896)

Summary:
This patch completes the second part of the task: "Add blob support to the dump and dump_live_files command"

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9896

Reviewed By: ltamasi

Differential Revision: D35852667

Pulled By: jowlyzhang

fbshipit-source-id: a006456c881f468a92da689e895134762e9574e1
This commit is contained in:
yuzhangyu 2022-04-22 16:54:43 -07:00 committed by Facebook GitHub Bot
parent fff28a7725
commit ac29645743
3 changed files with 54 additions and 16 deletions

View File

@ -3574,13 +3574,17 @@ DBFileDumperCommand::DBFileDumperCommand(
const std::map<std::string, std::string>& options, const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags) const std::vector<std::string>& flags)
: LDBCommand(options, flags, true, : LDBCommand(options, flags, true,
BuildCmdLineOptions({ARG_DECODE_BLOB_INDEX})), BuildCmdLineOptions(
decode_blob_index_(IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX)) {} {ARG_DECODE_BLOB_INDEX, ARG_DUMP_UNCOMPRESSED_BLOBS})),
decode_blob_index_(IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX)),
dump_uncompressed_blobs_(
IsFlagPresent(flags, ARG_DUMP_UNCOMPRESSED_BLOBS)) {}
void DBFileDumperCommand::Help(std::string& ret) { void DBFileDumperCommand::Help(std::string& ret) {
ret.append(" "); ret.append(" ");
ret.append(DBFileDumperCommand::Name()); ret.append(DBFileDumperCommand::Name());
ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "] "); ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "] ");
ret.append(" [--" + ARG_DUMP_UNCOMPRESSED_BLOBS + "] ");
ret.append("\n"); ret.append("\n");
} }
@ -3591,6 +3595,8 @@ void DBFileDumperCommand::DoCommand() {
} }
Status s; Status s;
// TODO: Use --hex, --key_hex, --value_hex flags consistently for
// dumping manifest file, sst files and blob files.
std::cout << "Manifest File" << std::endl; std::cout << "Manifest File" << std::endl;
std::cout << "==============================" << std::endl; std::cout << "==============================" << std::endl;
std::string manifest_filename; std::string manifest_filename;
@ -3613,20 +3619,42 @@ void DBFileDumperCommand::DoCommand() {
DumpManifestFile(options_, manifest_filepath, false, false, false); DumpManifestFile(options_, manifest_filepath, false, false, false);
std::cout << std::endl; std::cout << std::endl;
std::cout << "SST Files" << std::endl; std::vector<ColumnFamilyMetaData> column_families;
std::cout << "==============================" << std::endl; db_->GetAllColumnFamilyMetaData(&column_families);
std::vector<LiveFileMetaData> metadata; for (const auto& column_family : column_families) {
db_->GetLiveFilesMetaData(&metadata); std::cout << "Column family name: " << column_family.name << std::endl;
for (auto& fileMetadata : metadata) { std::cout << "==============================" << std::endl;
std::string filename = fileMetadata.db_path + "/" + fileMetadata.name;
// Correct concatenation of filepath and filename:
// Check that there is no double slashes (or more!) when concatenation
// happens.
filename = NormalizePath(filename);
std::cout << filename << " level:" << fileMetadata.level << std::endl;
std::cout << "------------------------------" << std::endl;
DumpSstFile(options_, filename, false, true, decode_blob_index_);
std::cout << std::endl; std::cout << std::endl;
std::cout << "SST Files" << std::endl;
std::cout << "==============================" << std::endl;
for (const LevelMetaData& level : column_family.levels) {
for (const SstFileMetaData& sst_file : level.files) {
std::string filename = sst_file.db_path + "/" + sst_file.name;
// Correct concatenation of filepath and filename:
// Check that there is no double slashes (or more!) when concatenation
// happens.
filename = NormalizePath(filename);
std::cout << filename << " level:" << level.level << std::endl;
std::cout << "------------------------------" << std::endl;
DumpSstFile(options_, filename, false, true, decode_blob_index_);
std::cout << std::endl;
}
}
std::cout << "Blob Files" << std::endl;
std::cout << "==============================" << std::endl;
for (const BlobMetaData& blob_file : column_family.blob_files) {
std::string filename =
blob_file.blob_file_path + "/" + blob_file.blob_file_name;
// Correct concatenation of filepath and filename:
// Check that there is no double slashes (or more!) when concatenation
// happens.
filename = NormalizePath(filename);
std::cout << filename << std::endl;
std::cout << "------------------------------" << std::endl;
DumpBlobFile(filename, /* is_key_hex */ false, /* is_value_hex */ false,
dump_uncompressed_blobs_);
std::cout << std::endl;
}
} }
std::cout << std::endl; std::cout << std::endl;

View File

@ -47,6 +47,7 @@ class DBFileDumperCommand : public LDBCommand {
private: private:
bool decode_blob_index_; bool decode_blob_index_;
bool dump_uncompressed_blobs_;
}; };
class DBLiveFilesMetadataDumperCommand : public LDBCommand { class DBLiveFilesMetadataDumperCommand : public LDBCommand {

View File

@ -488,7 +488,7 @@ class LDBTestCase(unittest.TestCase):
dbPath += "/" dbPath += "/"
# Call the dump_live_files function with the edited dbPath name. # Call the dump_live_files function with the edited dbPath name.
self.assertTrue(self.dumpLiveFiles("--db=%s --decode_blob_index" % dbPath, dumpFilePath)) self.assertTrue(self.dumpLiveFiles("--db=%s --decode_blob_index --dump_uncompressed_blobs" % dbPath, dumpFilePath))
# Investigate the output # Investigate the output
with open(dumpFilePath, "r") as tmp: with open(dumpFilePath, "r") as tmp:
@ -496,13 +496,22 @@ class LDBTestCase(unittest.TestCase):
# Check that all the SST filenames have a correct full path (no multiple '/'). # Check that all the SST filenames have a correct full path (no multiple '/').
sstFileList = re.findall(r"%s.*\d+.sst" % dbPath, data) sstFileList = re.findall(r"%s.*\d+.sst" % dbPath, data)
self.assertTrue(len(sstFileList) >= 1)
for sstFilename in sstFileList: for sstFilename in sstFileList:
filenumber = re.findall(r"\d+.sst", sstFilename)[0] filenumber = re.findall(r"\d+.sst", sstFilename)[0]
self.assertEqual(sstFilename, dbPath+filenumber) self.assertEqual(sstFilename, dbPath+filenumber)
# Check that all the Blob filenames have a correct full path (no multiple '/').
blobFileList = re.findall(r"%s.*\d+.blob" % dbPath, data)
self.assertTrue(len(blobFileList) >= 1)
for blobFilename in blobFileList:
filenumber = re.findall(r"\d+.blob", blobFilename)[0]
self.assertEqual(blobFilename, dbPath+filenumber)
# Check that all the manifest filenames # Check that all the manifest filenames
# have a correct full path (no multiple '/'). # have a correct full path (no multiple '/').
manifestFileList = re.findall(r"%s.*MANIFEST-\d+" % dbPath, data) manifestFileList = re.findall(r"%s.*MANIFEST-\d+" % dbPath, data)
self.assertTrue(len(manifestFileList) >= 1)
for manifestFilename in manifestFileList: for manifestFilename in manifestFileList:
filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0] filenumber = re.findall(r"(?<=MANIFEST-)\d+", manifestFilename)[0]
self.assertEqual(manifestFilename, dbPath+"MANIFEST-"+filenumber) self.assertEqual(manifestFilename, dbPath+"MANIFEST-"+filenumber)