Implement a new subcommand "identify" for sst_dump (#6943)
Summary: Implemented a subcommand of sst_dump called identify, which determines whether a file is an SST file or identifies and lists all the SST files in a directory; This update also fixes the problem that sst_dump exits with a success state even if target file/directory does not exist/is not an SST file/is empty/is corrupted. One test is added to sst_dump_test. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6943 Test Plan: Passed make check and a few manual tests Reviewed By: pdillinger Differential Revision: D21928985 Pulled By: gg814 fbshipit-source-id: 9a8b48e0cf1a0e96b13f42b690aba8ad981afad3
This commit is contained in:
parent
fb08330f74
commit
119b26fac0
@ -13,6 +13,7 @@
|
|||||||
* Fix abnormally large estimate from GetApproximateSizes when a range starts near the end of one SST file and near the beginning of another. Now GetApproximateSizes consistently and fairly includes the size of SST metadata in addition to data blocks, attributing metadata proportionally among the data blocks based on their size.
|
* Fix abnormally large estimate from GetApproximateSizes when a range starts near the end of one SST file and near the beginning of another. Now GetApproximateSizes consistently and fairly includes the size of SST metadata in addition to data blocks, attributing metadata proportionally among the data blocks based on their size.
|
||||||
* Fix potential file descriptor leakage in PosixEnv's IsDirectory() and NewRandomAccessFile().
|
* Fix potential file descriptor leakage in PosixEnv's IsDirectory() and NewRandomAccessFile().
|
||||||
* Fix false negative from the VerifyChecksum() API when there is a checksum mismatch in an index partition block in a BlockBasedTable format table file (index_type is kTwoLevelIndexSearch).
|
* Fix false negative from the VerifyChecksum() API when there is a checksum mismatch in an index partition block in a BlockBasedTable format table file (index_type is kTwoLevelIndexSearch).
|
||||||
|
* Fix sst_dump to return non-zero exit code if the specified file is not a recognized SST file or fails requested checks.
|
||||||
|
|
||||||
### Public API Change
|
### Public API Change
|
||||||
* Flush(..., column_family) may return Status::ColumnFamilyDropped() instead of Status::InvalidArgument() if column_family is dropped while processing the flush request.
|
* Flush(..., column_family) may return Status::ColumnFamilyDropped() instead of Status::InvalidArgument() if column_family is dropped while processing the flush request.
|
||||||
|
@ -9,8 +9,7 @@
|
|||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
ROCKSDB_NAMESPACE::SSTDumpTool tool;
|
ROCKSDB_NAMESPACE::SSTDumpTool tool;
|
||||||
tool.Run(argc, argv);
|
return tool.Run(argc, argv);
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -298,13 +298,50 @@ TEST_F(SSTDumpToolTest, NoSstFile) {
|
|||||||
"--command=verify", "--command=recompress", "--command=verify_checksum",
|
"--command=verify", "--command=recompress", "--command=verify_checksum",
|
||||||
"--show_properties"}) {
|
"--show_properties"}) {
|
||||||
snprintf(usage[1], kOptLength, "%s", command);
|
snprintf(usage[1], kOptLength, "%s", command);
|
||||||
ASSERT_TRUE(!tool.Run(3, usage, opts));
|
ASSERT_TRUE(tool.Run(3, usage, opts));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < 3; i++) {
|
for (int i = 0; i < 3; i++) {
|
||||||
delete[] usage[i];
|
delete[] usage[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(SSTDumpToolTest, ValidSSTPath) {
|
||||||
|
Options opts;
|
||||||
|
opts.env = env();
|
||||||
|
char* usage[3];
|
||||||
|
PopulateCommandArgs("", "", usage);
|
||||||
|
SSTDumpTool tool;
|
||||||
|
std::string file_not_exists = MakeFilePath("file_not_exists.sst");
|
||||||
|
std::string sst_file = MakeFilePath("rocksdb_sst_test.sst");
|
||||||
|
createSST(opts, sst_file);
|
||||||
|
std::string text_file = MakeFilePath("text_file");
|
||||||
|
ASSERT_OK(WriteStringToFile(opts.env, "Hello World!", text_file));
|
||||||
|
std::string fake_sst = MakeFilePath("fake_sst.sst");
|
||||||
|
ASSERT_OK(WriteStringToFile(opts.env, "Not an SST file!", fake_sst));
|
||||||
|
|
||||||
|
for (const auto& command_arg : {"--command=verify", "--command=identify"}) {
|
||||||
|
snprintf(usage[1], kOptLength, "%s", command_arg);
|
||||||
|
|
||||||
|
snprintf(usage[2], kOptLength, "--file=%s", file_not_exists.c_str());
|
||||||
|
ASSERT_TRUE(tool.Run(3, usage, opts));
|
||||||
|
|
||||||
|
snprintf(usage[2], kOptLength, "--file=%s", sst_file.c_str());
|
||||||
|
ASSERT_TRUE(!tool.Run(3, usage, opts));
|
||||||
|
|
||||||
|
snprintf(usage[2], kOptLength, "--file=%s", text_file.c_str());
|
||||||
|
ASSERT_TRUE(tool.Run(3, usage, opts));
|
||||||
|
|
||||||
|
snprintf(usage[2], kOptLength, "--file=%s", fake_sst.c_str());
|
||||||
|
ASSERT_TRUE(tool.Run(3, usage, opts));
|
||||||
|
}
|
||||||
|
ASSERT_OK(opts.env->DeleteFile(sst_file));
|
||||||
|
ASSERT_OK(opts.env->DeleteFile(text_file));
|
||||||
|
ASSERT_OK(opts.env->DeleteFile(fake_sst));
|
||||||
|
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
delete[] usage[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
|
#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
|
||||||
|
@ -95,6 +95,12 @@ Status SstFileDumper::GetTableReader(const std::string& file_path) {
|
|||||||
s = options_.env->GetFileSize(file_path, &file_size);
|
s = options_.env->GetFileSize(file_path, &file_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check empty file
|
||||||
|
// if true, skip further processing of this file
|
||||||
|
if (file_size == 0) {
|
||||||
|
return Status::Aborted(file_path, "Empty file");
|
||||||
|
}
|
||||||
|
|
||||||
file_.reset(new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file),
|
file_.reset(new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file),
|
||||||
file_path));
|
file_path));
|
||||||
|
|
||||||
@ -478,20 +484,21 @@ namespace {
|
|||||||
void print_help() {
|
void print_help() {
|
||||||
fprintf(
|
fprintf(
|
||||||
stderr,
|
stderr,
|
||||||
R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw|recompress]
|
R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw|recompress|identify]
|
||||||
--file=<data_dir_OR_sst_file>
|
--file=<data_dir_OR_sst_file>
|
||||||
Path to SST file or directory containing SST files
|
Path to SST file or directory containing SST files
|
||||||
|
|
||||||
--env_uri=<uri of underlying Env>
|
--env_uri=<uri of underlying Env>
|
||||||
URI of underlying Env
|
URI of underlying Env
|
||||||
|
|
||||||
--command=check|scan|raw|verify
|
--command=check|scan|raw|verify|identify
|
||||||
check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
|
check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
|
||||||
scan: Iterate over entries in files and print them to screen
|
scan: Iterate over entries in files and print them to screen
|
||||||
raw: Dump all the table contents to <file_name>_dump.txt
|
raw: Dump all the table contents to <file_name>_dump.txt
|
||||||
verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
|
verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
|
||||||
recompress: reports the SST file size if recompressed with different
|
recompress: reports the SST file size if recompressed with different
|
||||||
compression types
|
compression types
|
||||||
|
identify: Reports a file is a valid SST file or lists all valid SST files under a directory
|
||||||
|
|
||||||
--output_hex
|
--output_hex
|
||||||
Can be combined with scan command to print the keys and values in Hex
|
Can be combined with scan command to print the keys and values in Hex
|
||||||
@ -520,7 +527,7 @@ void print_help() {
|
|||||||
|
|
||||||
--show_properties
|
--show_properties
|
||||||
Print table properties after iterating over the file when executing
|
Print table properties after iterating over the file when executing
|
||||||
check|scan|raw
|
check|scan|raw|identify
|
||||||
|
|
||||||
--set_block_size=<block_size>
|
--set_block_size=<block_size>
|
||||||
Can be combined with --command=recompress to set the block size that will
|
Can be combined with --command=recompress to set the block size that will
|
||||||
@ -748,17 +755,29 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
|
|||||||
ROCKSDB_NAMESPACE::Env* env = options.env;
|
ROCKSDB_NAMESPACE::Env* env = options.env;
|
||||||
ROCKSDB_NAMESPACE::Status st = env->GetChildren(dir_or_file, &filenames);
|
ROCKSDB_NAMESPACE::Status st = env->GetChildren(dir_or_file, &filenames);
|
||||||
bool dir = true;
|
bool dir = true;
|
||||||
if (!st.ok()) {
|
if (!st.ok() || filenames.empty()) {
|
||||||
|
// dir_or_file does not exist or does not contain children
|
||||||
|
// Check its existence first
|
||||||
|
Status s = env->FileExists(dir_or_file);
|
||||||
|
// dir_or_file does not exist
|
||||||
|
if (!s.ok()) {
|
||||||
|
fprintf(stderr, "%s%s: No such file or directory\n", s.ToString().c_str(),
|
||||||
|
dir_or_file);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// dir_or_file exists and is treated as a "file"
|
||||||
|
// since it has no children
|
||||||
|
// This is ok since later it will be checked
|
||||||
|
// that whether it is a valid sst or not
|
||||||
|
// (A directory "file" is not a valid sst)
|
||||||
filenames.clear();
|
filenames.clear();
|
||||||
filenames.push_back(dir_or_file);
|
filenames.push_back(dir_or_file);
|
||||||
dir = false;
|
dir = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(stdout, "from [%s] to [%s]\n",
|
|
||||||
ROCKSDB_NAMESPACE::Slice(from_key).ToString(true).c_str(),
|
|
||||||
ROCKSDB_NAMESPACE::Slice(to_key).ToString(true).c_str());
|
|
||||||
|
|
||||||
uint64_t total_read = 0;
|
uint64_t total_read = 0;
|
||||||
|
// List of RocksDB SST file without corruption
|
||||||
|
std::vector<std::string> valid_sst_files;
|
||||||
for (size_t i = 0; i < filenames.size(); i++) {
|
for (size_t i = 0; i < filenames.size(); i++) {
|
||||||
std::string filename = filenames.at(i);
|
std::string filename = filenames.at(i);
|
||||||
if (filename.length() <= 4 ||
|
if (filename.length() <= 4 ||
|
||||||
@ -766,6 +785,7 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
|
|||||||
// ignore
|
// ignore
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dir) {
|
if (dir) {
|
||||||
filename = std::string(dir_or_file) + "/" + filename;
|
filename = std::string(dir_or_file) + "/" + filename;
|
||||||
}
|
}
|
||||||
@ -773,10 +793,23 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
|
|||||||
ROCKSDB_NAMESPACE::SstFileDumper dumper(options, filename, readahead_size,
|
ROCKSDB_NAMESPACE::SstFileDumper dumper(options, filename, readahead_size,
|
||||||
verify_checksum, output_hex,
|
verify_checksum, output_hex,
|
||||||
decode_blob_index);
|
decode_blob_index);
|
||||||
|
// Not a valid SST
|
||||||
if (!dumper.getStatus().ok()) {
|
if (!dumper.getStatus().ok()) {
|
||||||
fprintf(stderr, "%s: %s\n", filename.c_str(),
|
fprintf(stderr, "%s: %s\n", filename.c_str(),
|
||||||
dumper.getStatus().ToString().c_str());
|
dumper.getStatus().ToString().c_str());
|
||||||
continue;
|
continue;
|
||||||
|
} else {
|
||||||
|
valid_sst_files.push_back(filename);
|
||||||
|
// Print out from and to key information once
|
||||||
|
// where there is at least one valid SST
|
||||||
|
if (valid_sst_files.size() == 1) {
|
||||||
|
// from_key and to_key are only used for "check", "scan", or ""
|
||||||
|
if (command == "check" || command == "scan" || command == "") {
|
||||||
|
fprintf(stdout, "from [%s] to [%s]\n",
|
||||||
|
ROCKSDB_NAMESPACE::Slice(from_key).ToString(true).c_str(),
|
||||||
|
ROCKSDB_NAMESPACE::Slice(to_key).ToString(true).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (command == "recompress") {
|
if (command == "recompress") {
|
||||||
@ -881,7 +914,35 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
|
|||||||
fprintf(stdout, "total filter block size: %" PRIu64 "\n",
|
fprintf(stdout, "total filter block size: %" PRIu64 "\n",
|
||||||
total_filter_block_size);
|
total_filter_block_size);
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
|
if (valid_sst_files.empty()) {
|
||||||
|
// No valid SST files are found
|
||||||
|
// Exit with an error state
|
||||||
|
if (dir) {
|
||||||
|
fprintf(stdout, "------------------------------\n");
|
||||||
|
fprintf(stderr, "No valid SST files found in %s\n", dir_or_file);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "%s is not a valid SST file\n", dir_or_file);
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
if (command == "identify") {
|
||||||
|
if (dir) {
|
||||||
|
fprintf(stdout, "------------------------------\n");
|
||||||
|
fprintf(stdout, "List of valid SST files found in %s:\n", dir_or_file);
|
||||||
|
for (const auto& f : valid_sst_files) {
|
||||||
|
fprintf(stdout, "%s\n", f.c_str());
|
||||||
|
}
|
||||||
|
fprintf(stdout, "Number of valid SST files: %zu\n",
|
||||||
|
valid_sst_files.size());
|
||||||
|
} else {
|
||||||
|
fprintf(stdout, "%s is a valid SST file\n", dir_or_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// At least one valid SST
|
||||||
|
// exit with a success state
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // namespace ROCKSDB_NAMESPACE
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user