make sst_dump compression size command consistent
Summary: - like other subcommands, reporting compression sizes should be specified with the `--command` CLI arg. - also added `--compression_types` arg as it's useful to restrict the types of compression used, at least in my dictionary compression experiments. Closes https://github.com/facebook/rocksdb/pull/2706 Differential Revision: D5589520 Pulled By: ajkr fbshipit-source-id: 305bb4ebcc95eecc8a85523cd3b1050619c9ddc5
This commit is contained in:
parent
74f18c1301
commit
8254e9b57c
@ -195,7 +195,7 @@ TEST_F(SSTDumpToolTest, CompressedSizes) {
|
||||
}
|
||||
|
||||
snprintf(usage[0], optLength, "./sst_dump");
|
||||
snprintf(usage[1], optLength, "--show_compression_sizes");
|
||||
snprintf(usage[1], optLength, "--command=recompress");
|
||||
snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst");
|
||||
rocksdb::SSTDumpTool tool;
|
||||
ASSERT_TRUE(!tool.Run(3, usage));
|
||||
|
@ -60,6 +60,17 @@ extern const uint64_t kLegacyPlainTableMagicNumber;
|
||||
|
||||
const char* testFileName = "test_file_name";
|
||||
|
||||
static const std::vector<std::pair<CompressionType, const char*>>
|
||||
kCompressions = {
|
||||
{CompressionType::kNoCompression, "kNoCompression"},
|
||||
{CompressionType::kSnappyCompression, "kSnappyCompression"},
|
||||
{CompressionType::kZlibCompression, "kZlibCompression"},
|
||||
{CompressionType::kBZip2Compression, "kBZip2Compression"},
|
||||
{CompressionType::kLZ4Compression, "kLZ4Compression"},
|
||||
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
|
||||
{CompressionType::kXpressCompression, "kXpressCompression"},
|
||||
{CompressionType::kZSTD, "kZSTD"}};
|
||||
|
||||
Status SstFileReader::GetTableReader(const std::string& file_path) {
|
||||
// Warning about 'magic_number' being uninitialized shows up only in UBsan
|
||||
// builds. Though access is guarded by 's.ok()' checks, fix the issue to
|
||||
@ -174,7 +185,10 @@ uint64_t SstFileReader::CalculateCompressedTableSize(
|
||||
return size;
|
||||
}
|
||||
|
||||
int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
|
||||
int SstFileReader::ShowAllCompressionSizes(
|
||||
size_t block_size,
|
||||
const std::vector<std::pair<CompressionType, const char*>>&
|
||||
compression_types) {
|
||||
ReadOptions read_options;
|
||||
Options opts;
|
||||
const ImmutableCFOptions imoptions(opts);
|
||||
@ -184,17 +198,7 @@ int SstFileReader::ShowAllCompressionSizes(size_t block_size) {
|
||||
|
||||
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
|
||||
|
||||
std::pair<CompressionType, const char*> compressions[] = {
|
||||
{CompressionType::kNoCompression, "kNoCompression"},
|
||||
{CompressionType::kSnappyCompression, "kSnappyCompression"},
|
||||
{CompressionType::kZlibCompression, "kZlibCompression"},
|
||||
{CompressionType::kBZip2Compression, "kBZip2Compression"},
|
||||
{CompressionType::kLZ4Compression, "kLZ4Compression"},
|
||||
{CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
|
||||
{CompressionType::kXpressCompression, "kXpressCompression"},
|
||||
{CompressionType::kZSTD, "kZSTD"}};
|
||||
|
||||
for (auto& i : compressions) {
|
||||
for (auto& i : compression_types) {
|
||||
if (CompressionTypeSupported(i.first)) {
|
||||
CompressionOptions compress_opt;
|
||||
std::string column_family_name;
|
||||
@ -359,6 +363,8 @@ void print_help() {
|
||||
scan: Iterate over entries in files and print them to screen
|
||||
raw: Dump all the table contents to <file_name>_dump.txt
|
||||
verify: Iterate all the blocks in files verifying checksum to detect possible coruption but dont print anything except if a corruption is encountered
|
||||
recompress: reports the SST file size if recompressed with different
|
||||
compression types
|
||||
|
||||
--output_hex
|
||||
Can be combined with scan command to print the keys and values in Hex
|
||||
@ -383,15 +389,17 @@ void print_help() {
|
||||
Can be combined with --from and --to to indicate that these values are encoded in Hex
|
||||
|
||||
--show_properties
|
||||
Print table properties after iterating over the file
|
||||
|
||||
--show_compression_sizes
|
||||
Independent command that will recreate the SST file using 16K block size with different
|
||||
compressions and report the size of the file using such compression
|
||||
Print table properties after iterating over the file when executing
|
||||
check|scan|raw
|
||||
|
||||
--set_block_size=<block_size>
|
||||
Can be combined with --show_compression_sizes to set the block size that will be used
|
||||
when trying different compression algorithms
|
||||
Can be combined with --command=recompress to set the block size that will
|
||||
be used when trying different compression algorithms
|
||||
|
||||
--compression_types=<comma-separated list of CompressionType members, e.g.,
|
||||
kSnappyCompression>
|
||||
Can be combined with --command=recompress to run recompression for this
|
||||
list of compression types
|
||||
|
||||
--parse_internal_key=<0xKEY>
|
||||
Convenience option to parse an internal key on the command line. Dumps the
|
||||
@ -415,13 +423,13 @@ int SSTDumpTool::Run(int argc, char** argv) {
|
||||
bool has_to = false;
|
||||
bool use_from_as_prefix = false;
|
||||
bool show_properties = false;
|
||||
bool show_compression_sizes = false;
|
||||
bool show_summary = false;
|
||||
bool set_block_size = false;
|
||||
std::string from_key;
|
||||
std::string to_key;
|
||||
std::string block_size_str;
|
||||
size_t block_size;
|
||||
std::vector<std::pair<CompressionType, const char*>> compression_types;
|
||||
uint64_t total_num_files = 0;
|
||||
uint64_t total_num_data_blocks = 0;
|
||||
uint64_t total_data_block_size = 0;
|
||||
@ -453,19 +461,34 @@ int SSTDumpTool::Run(int argc, char** argv) {
|
||||
use_from_as_prefix = true;
|
||||
} else if (strcmp(argv[i], "--show_properties") == 0) {
|
||||
show_properties = true;
|
||||
} else if (strcmp(argv[i], "--show_compression_sizes") == 0) {
|
||||
show_compression_sizes = true;
|
||||
} else if (strcmp(argv[i], "--show_summary") == 0) {
|
||||
show_summary = true;
|
||||
} else if (strncmp(argv[i], "--set_block_size=", 17) == 0) {
|
||||
set_block_size = true;
|
||||
block_size_str = argv[i] + 17;
|
||||
std::istringstream iss(block_size_str);
|
||||
iss >> block_size;
|
||||
if (iss.fail()) {
|
||||
fprintf(stderr, "block size must be numeric");
|
||||
fprintf(stderr, "block size must be numeric\n");
|
||||
exit(1);
|
||||
}
|
||||
iss >> block_size;
|
||||
} else if (strncmp(argv[i], "--compression_types=", 20) == 0) {
|
||||
std::string compression_types_csv = argv[i] + 20;
|
||||
std::istringstream iss(compression_types_csv);
|
||||
std::string compression_type;
|
||||
while (std::getline(iss, compression_type, ',')) {
|
||||
auto iter = std::find_if(
|
||||
kCompressions.begin(), kCompressions.end(),
|
||||
[&compression_type](std::pair<CompressionType, const char*> curr) {
|
||||
return curr.second == compression_type;
|
||||
});
|
||||
if (iter == kCompressions.end()) {
|
||||
fprintf(stderr, "%s is not a valid CompressionType\n",
|
||||
compression_type.c_str());
|
||||
exit(1);
|
||||
}
|
||||
compression_types.emplace_back(*iter);
|
||||
}
|
||||
} else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
|
||||
std::string in_key(argv[i] + 21);
|
||||
try {
|
||||
@ -547,12 +570,10 @@ int SSTDumpTool::Run(int argc, char** argv) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (show_compression_sizes) {
|
||||
if (set_block_size) {
|
||||
reader.ShowAllCompressionSizes(block_size);
|
||||
} else {
|
||||
reader.ShowAllCompressionSizes(16384);
|
||||
}
|
||||
if (command == "recompress") {
|
||||
reader.ShowAllCompressionSizes(
|
||||
set_block_size ? block_size : 16384,
|
||||
compression_types.empty() ? kCompressions : compression_types);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,10 @@ class SstFileReader {
|
||||
Status DumpTable(const std::string& out_filename);
|
||||
Status getStatus() { return init_result_; }
|
||||
|
||||
int ShowAllCompressionSizes(size_t block_size);
|
||||
int ShowAllCompressionSizes(
|
||||
size_t block_size,
|
||||
const std::vector<std::pair<CompressionType, const char*>>&
|
||||
compression_types);
|
||||
|
||||
private:
|
||||
// Get the TableReader implementation for the sst file
|
||||
|
Loading…
x
Reference in New Issue
Block a user