Implement a new subcommand "identify" for sst_dump (#6943)

Summary:
Implemented a subcommand of sst_dump called identify, which determines whether a file is an SST file or identifies and lists all the SST files in a directory;

This update also fixes the problem that sst_dump exits with a success state even if target file/directory does not exist/is not an SST file/is empty/is corrupted.

One test is added to sst_dump_test.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6943

Test Plan: Passed make check and a few manual tests

Reviewed By: pdillinger

Differential Revision: D21928985

Pulled By: gg814

fbshipit-source-id: 9a8b48e0cf1a0e96b13f42b690aba8ad981afad3
This commit is contained in:
Zitan Chen 2020-06-08 13:56:22 -07:00 committed by Facebook GitHub Bot
parent fb08330f74
commit 119b26fac0
4 changed files with 110 additions and 12 deletions

View File

@ -13,6 +13,7 @@
* Fix abnormally large estimate from GetApproximateSizes when a range starts near the end of one SST file and near the beginning of another. Now GetApproximateSizes consistently and fairly includes the size of SST metadata in addition to data blocks, attributing metadata proportionally among the data blocks based on their size. * Fix abnormally large estimate from GetApproximateSizes when a range starts near the end of one SST file and near the beginning of another. Now GetApproximateSizes consistently and fairly includes the size of SST metadata in addition to data blocks, attributing metadata proportionally among the data blocks based on their size.
* Fix potential file descriptor leakage in PosixEnv's IsDirectory() and NewRandomAccessFile(). * Fix potential file descriptor leakage in PosixEnv's IsDirectory() and NewRandomAccessFile().
* Fix false negative from the VerifyChecksum() API when there is a checksum mismatch in an index partition block in a BlockBasedTable format table file (index_type is kTwoLevelIndexSearch). * Fix false negative from the VerifyChecksum() API when there is a checksum mismatch in an index partition block in a BlockBasedTable format table file (index_type is kTwoLevelIndexSearch).
* Fix sst_dump to return non-zero exit code if the specified file is not a recognized SST file or fails requested checks.
### Public API Change ### Public API Change
* Flush(..., column_family) may return Status::ColumnFamilyDropped() instead of Status::InvalidArgument() if column_family is dropped while processing the flush request. * Flush(..., column_family) may return Status::ColumnFamilyDropped() instead of Status::InvalidArgument() if column_family is dropped while processing the flush request.

View File

@ -9,8 +9,7 @@
int main(int argc, char** argv) { int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::SSTDumpTool tool; ROCKSDB_NAMESPACE::SSTDumpTool tool;
tool.Run(argc, argv); return tool.Run(argc, argv);
return 0;
} }
#else #else
#include <stdio.h> #include <stdio.h>

View File

@ -298,13 +298,50 @@ TEST_F(SSTDumpToolTest, NoSstFile) {
"--command=verify", "--command=recompress", "--command=verify_checksum", "--command=verify", "--command=recompress", "--command=verify_checksum",
"--show_properties"}) { "--show_properties"}) {
snprintf(usage[1], kOptLength, "%s", command); snprintf(usage[1], kOptLength, "%s", command);
ASSERT_TRUE(!tool.Run(3, usage, opts)); ASSERT_TRUE(tool.Run(3, usage, opts));
} }
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
delete[] usage[i]; delete[] usage[i];
} }
} }
TEST_F(SSTDumpToolTest, ValidSSTPath) {
Options opts;
opts.env = env();
char* usage[3];
PopulateCommandArgs("", "", usage);
SSTDumpTool tool;
std::string file_not_exists = MakeFilePath("file_not_exists.sst");
std::string sst_file = MakeFilePath("rocksdb_sst_test.sst");
createSST(opts, sst_file);
std::string text_file = MakeFilePath("text_file");
ASSERT_OK(WriteStringToFile(opts.env, "Hello World!", text_file));
std::string fake_sst = MakeFilePath("fake_sst.sst");
ASSERT_OK(WriteStringToFile(opts.env, "Not an SST file!", fake_sst));
for (const auto& command_arg : {"--command=verify", "--command=identify"}) {
snprintf(usage[1], kOptLength, "%s", command_arg);
snprintf(usage[2], kOptLength, "--file=%s", file_not_exists.c_str());
ASSERT_TRUE(tool.Run(3, usage, opts));
snprintf(usage[2], kOptLength, "--file=%s", sst_file.c_str());
ASSERT_TRUE(!tool.Run(3, usage, opts));
snprintf(usage[2], kOptLength, "--file=%s", text_file.c_str());
ASSERT_TRUE(tool.Run(3, usage, opts));
snprintf(usage[2], kOptLength, "--file=%s", fake_sst.c_str());
ASSERT_TRUE(tool.Run(3, usage, opts));
}
ASSERT_OK(opts.env->DeleteFile(sst_file));
ASSERT_OK(opts.env->DeleteFile(text_file));
ASSERT_OK(opts.env->DeleteFile(fake_sst));
for (int i = 0; i < 3; i++) {
delete[] usage[i];
}
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS

View File

@ -95,6 +95,12 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) {
s = options_.env->GetFileSize(file_path, &file_size); s = options_.env->GetFileSize(file_path, &file_size);
} }
// check empty file
// if true, skip further processing of this file
if (file_size == 0) {
return Status::Aborted(file_path, "Empty file");
}
file_.reset(new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file), file_.reset(new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file),
file_path)); file_path));
@ -478,20 +484,21 @@ namespace {
void print_help() { void print_help() {
fprintf( fprintf(
stderr, stderr,
R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw|recompress] R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw|recompress|identify]
--file=<data_dir_OR_sst_file> --file=<data_dir_OR_sst_file>
Path to SST file or directory containing SST files Path to SST file or directory containing SST files
--env_uri=<uri of underlying Env> --env_uri=<uri of underlying Env>
URI of underlying Env URI of underlying Env
--command=check|scan|raw|verify --command=check|scan|raw|verify|identify
check: Iterate over entries in files but don't print anything except if an error is encountered (default command) check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
scan: Iterate over entries in files and print them to screen scan: Iterate over entries in files and print them to screen
raw: Dump all the table contents to <file_name>_dump.txt raw: Dump all the table contents to <file_name>_dump.txt
verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
recompress: reports the SST file size if recompressed with different recompress: reports the SST file size if recompressed with different
compression types compression types
identify: Reports a file is a valid SST file or lists all valid SST files under a directory
--output_hex --output_hex
Can be combined with scan command to print the keys and values in Hex Can be combined with scan command to print the keys and values in Hex
@ -520,7 +527,7 @@ void print_help() {
--show_properties --show_properties
Print table properties after iterating over the file when executing Print table properties after iterating over the file when executing
check|scan|raw check|scan|raw|identify
--set_block_size=<block_size> --set_block_size=<block_size>
Can be combined with --command=recompress to set the block size that will Can be combined with --command=recompress to set the block size that will
@ -748,17 +755,29 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
ROCKSDB_NAMESPACE::Env* env = options.env; ROCKSDB_NAMESPACE::Env* env = options.env;
ROCKSDB_NAMESPACE::Status st = env->GetChildren(dir_or_file, &filenames); ROCKSDB_NAMESPACE::Status st = env->GetChildren(dir_or_file, &filenames);
bool dir = true; bool dir = true;
if (!st.ok()) { if (!st.ok() || filenames.empty()) {
// dir_or_file does not exist or does not contain children
// Check its existence first
Status s = env->FileExists(dir_or_file);
// dir_or_file does not exist
if (!s.ok()) {
fprintf(stderr, "%s%s: No such file or directory\n", s.ToString().c_str(),
dir_or_file);
return 1;
}
// dir_or_file exists and is treated as a "file"
// since it has no children
// This is ok since later it will be checked
// that whether it is a valid sst or not
// (A directory "file" is not a valid sst)
filenames.clear(); filenames.clear();
filenames.push_back(dir_or_file); filenames.push_back(dir_or_file);
dir = false; dir = false;
} }
fprintf(stdout, "from [%s] to [%s]\n",
ROCKSDB_NAMESPACE::Slice(from_key).ToString(true).c_str(),
ROCKSDB_NAMESPACE::Slice(to_key).ToString(true).c_str());
uint64_t total_read = 0; uint64_t total_read = 0;
// List of RocksDB SST file without corruption
std::vector<std::string> valid_sst_files;
for (size_t i = 0; i < filenames.size(); i++) { for (size_t i = 0; i < filenames.size(); i++) {
std::string filename = filenames.at(i); std::string filename = filenames.at(i);
if (filename.length() <= 4 || if (filename.length() <= 4 ||
@ -766,6 +785,7 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
// ignore // ignore
continue; continue;
} }
if (dir) { if (dir) {
filename = std::string(dir_or_file) + "/" + filename; filename = std::string(dir_or_file) + "/" + filename;
} }
@ -773,10 +793,23 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
ROCKSDB_NAMESPACE::SstFileDumper dumper(options, filename, readahead_size, ROCKSDB_NAMESPACE::SstFileDumper dumper(options, filename, readahead_size,
verify_checksum, output_hex, verify_checksum, output_hex,
decode_blob_index); decode_blob_index);
// Not a valid SST
if (!dumper.getStatus().ok()) { if (!dumper.getStatus().ok()) {
fprintf(stderr, "%s: %s\n", filename.c_str(), fprintf(stderr, "%s: %s\n", filename.c_str(),
dumper.getStatus().ToString().c_str()); dumper.getStatus().ToString().c_str());
continue; continue;
} else {
valid_sst_files.push_back(filename);
// Print out from and to key information once
// where there is at least one valid SST
if (valid_sst_files.size() == 1) {
// from_key and to_key are only used for "check", "scan", or ""
if (command == "check" || command == "scan" || command == "") {
fprintf(stdout, "from [%s] to [%s]\n",
ROCKSDB_NAMESPACE::Slice(from_key).ToString(true).c_str(),
ROCKSDB_NAMESPACE::Slice(to_key).ToString(true).c_str());
}
}
} }
if (command == "recompress") { if (command == "recompress") {
@ -881,7 +914,35 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
fprintf(stdout, "total filter block size: %" PRIu64 "\n", fprintf(stdout, "total filter block size: %" PRIu64 "\n",
total_filter_block_size); total_filter_block_size);
} }
return 0;
if (valid_sst_files.empty()) {
// No valid SST files are found
// Exit with an error state
if (dir) {
fprintf(stdout, "------------------------------\n");
fprintf(stderr, "No valid SST files found in %s\n", dir_or_file);
} else {
fprintf(stderr, "%s is not a valid SST file\n", dir_or_file);
}
return 1;
} else {
if (command == "identify") {
if (dir) {
fprintf(stdout, "------------------------------\n");
fprintf(stdout, "List of valid SST files found in %s:\n", dir_or_file);
for (const auto& f : valid_sst_files) {
fprintf(stdout, "%s\n", f.c_str());
}
fprintf(stdout, "Number of valid SST files: %zu\n",
valid_sst_files.size());
} else {
fprintf(stdout, "%s is a valid SST file\n", dir_or_file);
}
}
// At least one valid SST
// exit with a success state
return 0;
}
} }
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE