make sst_dump compression size command consistent
Summary: - like other subcommands, reporting compression sizes should be specified with the `--command` CLI arg. - also added `--compression_types` arg as it's useful to restrict the types of compression used, at least in my dictionary compression experiments. Closes https://github.com/facebook/rocksdb/pull/2706 Differential Revision: D5589520 Pulled By: ajkr fbshipit-source-id: 305bb4ebcc95eecc8a85523cd3b1050619c9ddc5
This commit is contained in:
parent
74f18c1301
commit
8254e9b57c
@ -195,7 +195,7 @@ TEST_F(SSTDumpToolTest, CompressedSizes) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
snprintf(usage[0], optLength, "./sst_dump");
|
snprintf(usage[0], optLength, "./sst_dump");
|
||||||
snprintf(usage[1], optLength, "--show_compression_sizes");
|
snprintf(usage[1], optLength, "--command=recompress");
|
||||||
snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst");
|
snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst");
|
||||||
rocksdb::SSTDumpTool tool;
|
rocksdb::SSTDumpTool tool;
|
||||||
ASSERT_TRUE(!tool.Run(3, usage));
|
ASSERT_TRUE(!tool.Run(3, usage));
|
||||||
|
@ -60,6 +60,17 @@ extern const uint64_t kLegacyPlainTableMagicNumber;
|
|||||||
|
|
||||||
const char* testFileName = "test_file_name";
|
const char* testFileName = "test_file_name";
|
||||||
|
|
||||||
|
static const std::vector<std::pair<CompressionType, const char*>>
|
||||||
|
kCompressions = {
|
||||||
|
{CompressionType::kNoCompression, "kNoCompression"},
|
||||||
|
{CompressionType::kSnappyCompression, "kSnappyCompression"},
|
||||||
|
{CompressionType::kZlibCompression, "kZlibCompression"},
|
||||||
|
{CompressionType::kBZip2Compression, "kBZip2Compression"},
|
||||||
|
{CompressionType::kLZ4Compression, "kLZ4Compression"},
|
||||||
|
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
|
||||||
|
{CompressionType::kXpressCompression, "kXpressCompression"},
|
||||||
|
{CompressionType::kZSTD, "kZSTD"}};
|
||||||
|
|
||||||
Status SstFileReader::GetTableReader(const std::string& file_path) {
|
Status SstFileReader::GetTableReader(const std::string& file_path) {
|
||||||
// Warning about 'magic_number' being uninitialized shows up only in UBsan
|
// Warning about 'magic_number' being uninitialized shows up only in UBsan
|
||||||
// builds. Though access is guarded by 's.ok()' checks, fix the issue to
|
// builds. Though access is guarded by 's.ok()' checks, fix the issue to
|
||||||
@ -174,7 +185,10 @@ uint64_t SstFileReader::CalculateCompressedTableSize(
|
|||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
|
int SstFileReader::ShowAllCompressionSizes(
|
||||||
|
size_t block_size,
|
||||||
|
const std::vector<std::pair<CompressionType, const char*>>&
|
||||||
|
compression_types) {
|
||||||
ReadOptions read_options;
|
ReadOptions read_options;
|
||||||
Options opts;
|
Options opts;
|
||||||
const ImmutableCFOptions imoptions(opts);
|
const ImmutableCFOptions imoptions(opts);
|
||||||
@ -184,17 +198,7 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
|
|||||||
|
|
||||||
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
|
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
|
||||||
|
|
||||||
std::pair<CompressionType, const char*> compressions[] = {
|
for (auto& i : compression_types) {
|
||||||
{CompressionType::kNoCompression, "kNoCompression"},
|
|
||||||
{CompressionType::kSnappyCompression, "kSnappyCompression"},
|
|
||||||
{CompressionType::kZlibCompression, "kZlibCompression"},
|
|
||||||
{CompressionType::kBZip2Compression, "kBZip2Compression"},
|
|
||||||
{CompressionType::kLZ4Compression, "kLZ4Compression"},
|
|
||||||
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
|
|
||||||
{CompressionType::kXpressCompression, "kXpressCompression"},
|
|
||||||
{CompressionType::kZSTD, "kZSTD"}};
|
|
||||||
|
|
||||||
for (auto& i : compressions) {
|
|
||||||
if (CompressionTypeSupported(i.first)) {
|
if (CompressionTypeSupported(i.first)) {
|
||||||
CompressionOptions compress_opt;
|
CompressionOptions compress_opt;
|
||||||
std::string column_family_name;
|
std::string column_family_name;
|
||||||
@ -359,6 +363,8 @@ void print_help() {
|
|||||||
scan: Iterate over entries in files and print them to screen
|
scan: Iterate over entries in files and print them to screen
|
||||||
raw: Dump all the table contents to <file_name>_dump.txt
|
raw: Dump all the table contents to <file_name>_dump.txt
|
||||||
verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered
|
verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered
|
||||||
|
recompress: reports the SST file size if recompressed with different
|
||||||
|
compression types
|
||||||
|
|
||||||
--output_hex
|
--output_hex
|
||||||
Can be combined with scan command to print the keys and values in Hex
|
Can be combined with scan command to print the keys and values in Hex
|
||||||
@ -383,15 +389,17 @@ void print_help() {
|
|||||||
Can be combined with --from and --to to indicate that these values are encoded in Hex
|
Can be combined with --from and --to to indicate that these values are encoded in Hex
|
||||||
|
|
||||||
--show_properties
|
--show_properties
|
||||||
Print table properties after iterating over the file
|
Print table properties after iterating over the file when executing
|
||||||
|
check|scan|raw
|
||||||
--show_compression_sizes
|
|
||||||
Independent command that will recreate the SST file using 16K block size with different
|
|
||||||
compressions and report the size of the file using such compression
|
|
||||||
|
|
||||||
--set_block_size=<block_size>
|
--set_block_size=<block_size>
|
||||||
Can be combined with --show_compression_sizes to set the block size that will be used
|
Can be combined with --command=recompress to set the block size that will
|
||||||
when trying different compression algorithms
|
be used when trying different compression algorithms
|
||||||
|
|
||||||
|
--compression_types=<comma-separated list of CompressionType members, e.g.,
|
||||||
|
kSnappyCompression>
|
||||||
|
Can be combined with --command=recompress to run recompression for this
|
||||||
|
list of compression types
|
||||||
|
|
||||||
--parse_internal_key=<0xKEY>
|
--parse_internal_key=<0xKEY>
|
||||||
Convenience option to parse an internal key on the command line. Dumps the
|
Convenience option to parse an internal key on the command line. Dumps the
|
||||||
@ -415,13 +423,13 @@ int SSTDumpTool::Run(int argc, char** argv) {
|
|||||||
bool has_to = false;
|
bool has_to = false;
|
||||||
bool use_from_as_prefix = false;
|
bool use_from_as_prefix = false;
|
||||||
bool show_properties = false;
|
bool show_properties = false;
|
||||||
bool show_compression_sizes = false;
|
|
||||||
bool show_summary = false;
|
bool show_summary = false;
|
||||||
bool set_block_size = false;
|
bool set_block_size = false;
|
||||||
std::string from_key;
|
std::string from_key;
|
||||||
std::string to_key;
|
std::string to_key;
|
||||||
std::string block_size_str;
|
std::string block_size_str;
|
||||||
size_t block_size;
|
size_t block_size;
|
||||||
|
std::vector<std::pair<CompressionType, const char*>> compression_types;
|
||||||
uint64_t total_num_files = 0;
|
uint64_t total_num_files = 0;
|
||||||
uint64_t total_num_data_blocks = 0;
|
uint64_t total_num_data_blocks = 0;
|
||||||
uint64_t total_data_block_size = 0;
|
uint64_t total_data_block_size = 0;
|
||||||
@ -453,19 +461,34 @@ int SSTDumpTool::Run(int argc, char** argv) {
|
|||||||
use_from_as_prefix = true;
|
use_from_as_prefix = true;
|
||||||
} else if (strcmp(argv[i], "--show_properties") == 0) {
|
} else if (strcmp(argv[i], "--show_properties") == 0) {
|
||||||
show_properties = true;
|
show_properties = true;
|
||||||
} else if (strcmp(argv[i], "--show_compression_sizes") == 0) {
|
|
||||||
show_compression_sizes = true;
|
|
||||||
} else if (strcmp(argv[i], "--show_summary") == 0) {
|
} else if (strcmp(argv[i], "--show_summary") == 0) {
|
||||||
show_summary = true;
|
show_summary = true;
|
||||||
} else if (strncmp(argv[i], "--set_block_size=", 17) == 0) {
|
} else if (strncmp(argv[i], "--set_block_size=", 17) == 0) {
|
||||||
set_block_size = true;
|
set_block_size = true;
|
||||||
block_size_str = argv[i] + 17;
|
block_size_str = argv[i] + 17;
|
||||||
std::istringstream iss(block_size_str);
|
std::istringstream iss(block_size_str);
|
||||||
|
iss >> block_size;
|
||||||
if (iss.fail()) {
|
if (iss.fail()) {
|
||||||
fprintf(stderr, "block size must be numeric");
|
fprintf(stderr, "block size must be numeric\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
iss >> block_size;
|
} else if (strncmp(argv[i], "--compression_types=", 20) == 0) {
|
||||||
|
std::string compression_types_csv = argv[i] + 20;
|
||||||
|
std::istringstream iss(compression_types_csv);
|
||||||
|
std::string compression_type;
|
||||||
|
while (std::getline(iss, compression_type, ',')) {
|
||||||
|
auto iter = std::find_if(
|
||||||
|
kCompressions.begin(), kCompressions.end(),
|
||||||
|
[&compression_type](std::pair<CompressionType, const char*> curr) {
|
||||||
|
return curr.second == compression_type;
|
||||||
|
});
|
||||||
|
if (iter == kCompressions.end()) {
|
||||||
|
fprintf(stderr, "%s is not a valid CompressionType\n",
|
||||||
|
compression_type.c_str());
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
compression_types.emplace_back(*iter);
|
||||||
|
}
|
||||||
} else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
|
} else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
|
||||||
std::string in_key(argv[i] + 21);
|
std::string in_key(argv[i] + 21);
|
||||||
try {
|
try {
|
||||||
@ -547,12 +570,10 @@ int SSTDumpTool::Run(int argc, char** argv) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (show_compression_sizes) {
|
if (command == "recompress") {
|
||||||
if (set_block_size) {
|
reader.ShowAllCompressionSizes(
|
||||||
reader.ShowAllCompressionSizes(block_size);
|
set_block_size ? block_size : 16384,
|
||||||
} else {
|
compression_types.empty() ? kCompressions : compression_types);
|
||||||
reader.ShowAllCompressionSizes(16384);
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,7 +34,10 @@ class SstFileReader {
|
|||||||
Status DumpTable(const std::string& out_filename);
|
Status DumpTable(const std::string& out_filename);
|
||||||
Status getStatus() { return init_result_; }
|
Status getStatus() { return init_result_; }
|
||||||
|
|
||||||
int ShowAllCompressionSizes(size_t block_size);
|
int ShowAllCompressionSizes(
|
||||||
|
size_t block_size,
|
||||||
|
const std::vector<std::pair<CompressionType, const char*>>&
|
||||||
|
compression_types);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Get the TableReader implementation for the sst file
|
// Get the TableReader implementation for the sst file
|
||||||
|
Loading…
x
Reference in New Issue
Block a user