make sst_dump compression size command consistent

Summary:
- like other subcommands, reporting compression sizes should be specified with the `--command` CLI arg.
- also added `--compression_types` arg as it's useful to restrict the types of compression used, at least in my dictionary compression experiments.
Closes https://github.com/facebook/rocksdb/pull/2706

Differential Revision: D5589520

Pulled By: ajkr

fbshipit-source-id: 305bb4ebcc95eecc8a85523cd3b1050619c9ddc5
This commit is contained in:
Andrew Kryczka 2017-08-11 15:49:17 -07:00 committed by Facebook Github Bot
parent 74f18c1301
commit 8254e9b57c
3 changed files with 56 additions and 32 deletions

View File

@ -195,7 +195,7 @@ TEST_F(SSTDumpToolTest, CompressedSizes) {
}
snprintf(usage[0], optLength, "./sst_dump");
snprintf(usage[1], optLength, "--show_compression_sizes");
snprintf(usage[1], optLength, "--command=recompress");
snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst");
rocksdb::SSTDumpTool tool;
ASSERT_TRUE(!tool.Run(3, usage));

View File

@ -60,6 +60,17 @@ extern const uint64_t kLegacyPlainTableMagicNumber;
const char* testFileName = "test_file_name";
static const std::vector<std::pair<CompressionType, const char*>>
kCompressions = {
{CompressionType::kNoCompression, "kNoCompression"},
{CompressionType::kSnappyCompression, "kSnappyCompression"},
{CompressionType::kZlibCompression, "kZlibCompression"},
{CompressionType::kBZip2Compression, "kBZip2Compression"},
{CompressionType::kLZ4Compression, "kLZ4Compression"},
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
{CompressionType::kXpressCompression, "kXpressCompression"},
{CompressionType::kZSTD, "kZSTD"}};
Status SstFileReader::GetTableReader(const std::string& file_path) {
// Warning about 'magic_number' being uninitialized shows up only in UBsan
// builds. Though access is guarded by 's.ok()' checks, fix the issue to
@ -174,7 +185,10 @@ uint64_t SstFileReader::CalculateCompressedTableSize(
return size;
}
int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
int SstFileReader::ShowAllCompressionSizes(
size_t block_size,
const std::vector<std::pair<CompressionType, const char*>>&
compression_types) {
ReadOptions read_options;
Options opts;
const ImmutableCFOptions imoptions(opts);
@ -184,17 +198,7 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
std::pair<CompressionType, const char*> compressions[] = {
{CompressionType::kNoCompression, "kNoCompression"},
{CompressionType::kSnappyCompression, "kSnappyCompression"},
{CompressionType::kZlibCompression, "kZlibCompression"},
{CompressionType::kBZip2Compression, "kBZip2Compression"},
{CompressionType::kLZ4Compression, "kLZ4Compression"},
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
{CompressionType::kXpressCompression, "kXpressCompression"},
{CompressionType::kZSTD, "kZSTD"}};
for (auto& i : compressions) {
for (auto& i : compression_types) {
if (CompressionTypeSupported(i.first)) {
CompressionOptions compress_opt;
std::string column_family_name;
@ -359,6 +363,8 @@ void print_help() {
scan: Iterate over entries in files and print them to screen
raw: Dump all the table contents to <file_name>_dump.txt
verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered
recompress: reports the SST file size if recompressed with different
compression types
--output_hex
Can be combined with scan command to print the keys and values in Hex
@ -383,15 +389,17 @@ void print_help() {
Can be combined with --from and --to to indicate that these values are encoded in Hex
--show_properties
Print table properties after iterating over the file
--show_compression_sizes
Independent command that will recreate the SST file using 16K block size with different
compressions and report the size of the file using such compression
Print table properties after iterating over the file when executing
check|scan|raw
--set_block_size=<block_size>
Can be combined with --show_compression_sizes to set the block size that will be used
when trying different compression algorithms
Can be combined with --command=recompress to set the block size that will
be used when trying different compression algorithms
--compression_types=<comma-separated list of CompressionType members, e.g.,
kSnappyCompression>
Can be combined with --command=recompress to run recompression for this
list of compression types
--parse_internal_key=<0xKEY>
Convenience option to parse an internal key on the command line. Dumps the
@ -415,13 +423,13 @@ int SSTDumpTool::Run(int argc, char** argv) {
bool has_to = false;
bool use_from_as_prefix = false;
bool show_properties = false;
bool show_compression_sizes = false;
bool show_summary = false;
bool set_block_size = false;
std::string from_key;
std::string to_key;
std::string block_size_str;
size_t block_size;
std::vector<std::pair<CompressionType, const char*>> compression_types;
uint64_t total_num_files = 0;
uint64_t total_num_data_blocks = 0;
uint64_t total_data_block_size = 0;
@ -453,19 +461,34 @@ int SSTDumpTool::Run(int argc, char** argv) {
use_from_as_prefix = true;
} else if (strcmp(argv[i], "--show_properties") == 0) {
show_properties = true;
} else if (strcmp(argv[i], "--show_compression_sizes") == 0) {
show_compression_sizes = true;
} else if (strcmp(argv[i], "--show_summary") == 0) {
show_summary = true;
} else if (strncmp(argv[i], "--set_block_size=", 17) == 0) {
set_block_size = true;
block_size_str = argv[i] + 17;
std::istringstream iss(block_size_str);
iss >> block_size;
if (iss.fail()) {
fprintf(stderr, "block size must be numeric");
fprintf(stderr, "block size must be numeric\n");
exit(1);
}
iss >> block_size;
} else if (strncmp(argv[i], "--compression_types=", 20) == 0) {
std::string compression_types_csv = argv[i] + 20;
std::istringstream iss(compression_types_csv);
std::string compression_type;
while (std::getline(iss, compression_type, ',')) {
auto iter = std::find_if(
kCompressions.begin(), kCompressions.end(),
[&compression_type](std::pair<CompressionType, const char*> curr) {
return curr.second == compression_type;
});
if (iter == kCompressions.end()) {
fprintf(stderr, "%s is not a valid CompressionType\n",
compression_type.c_str());
exit(1);
}
compression_types.emplace_back(*iter);
}
} else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
std::string in_key(argv[i] + 21);
try {
@ -547,12 +570,10 @@ int SSTDumpTool::Run(int argc, char** argv) {
continue;
}
if (show_compression_sizes) {
if (set_block_size) {
reader.ShowAllCompressionSizes(block_size);
} else {
reader.ShowAllCompressionSizes(16384);
}
if (command == "recompress") {
reader.ShowAllCompressionSizes(
set_block_size ? block_size : 16384,
compression_types.empty() ? kCompressions : compression_types);
return 0;
}

View File

@ -34,7 +34,10 @@ class SstFileReader {
Status DumpTable(const std::string& out_filename);
Status getStatus() { return init_result_; }
int ShowAllCompressionSizes(size_t block_size);
int ShowAllCompressionSizes(
size_t block_size,
const std::vector<std::pair<CompressionType, const char*>>&
compression_types);
private:
// Get the TableReader implementation for the sst file