Add --decode_blob_index option to idump and dump commands (#9870)

Summary:
This patch completes the first part of the task: "Extend all three commands so they can decode and print blob references if a new option --decode_blob_index is specified"

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9870

Reviewed By: ltamasi

Differential Revision: D35753932

Pulled By: jowlyzhang

fbshipit-source-id: 9d2bbba0eef2ed86b982767eba9de1b4881f35c9
This commit is contained in:
yuzhangyu 2022-04-20 11:10:20 -07:00 committed by Facebook GitHub Bot
parent a5063c8931
commit 9b5790f018
3 changed files with 52 additions and 16 deletions

View File

@ -18,6 +18,7 @@
#include <stdexcept>
#include <string>
#include "db/blob/blob_index.h"
#include "db/db_impl/db_impl.h"
#include "db/dbformat.h"
#include "db/log_reader.h"
@ -1696,11 +1697,11 @@ InternalDumpCommand::InternalDumpCommand(
const std::vector<std::string>& /*params*/,
const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags)
: LDBCommand(
options, flags, true,
BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM,
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY,
ARG_COUNT_DELIM, ARG_STATS, ARG_INPUT_KEY_HEX})),
: LDBCommand(options, flags, true,
BuildCmdLineOptions(
{ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM, ARG_TO,
ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS,
ARG_INPUT_KEY_HEX, ARG_DECODE_BLOB_INDEX})),
has_from_(false),
has_to_(false),
max_keys_(-1),
@ -1708,7 +1709,8 @@ InternalDumpCommand::InternalDumpCommand(
count_only_(false),
count_delim_(false),
print_stats_(false),
is_input_key_hex_(false) {
is_input_key_hex_(false),
decode_blob_index_(false) {
has_from_ = ParseStringOption(options, ARG_FROM, &from_);
has_to_ = ParseStringOption(options, ARG_TO, &to_);
@ -1726,6 +1728,7 @@ InternalDumpCommand::InternalDumpCommand(
print_stats_ = IsFlagPresent(flags, ARG_STATS);
count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY);
is_input_key_hex_ = IsFlagPresent(flags, ARG_INPUT_KEY_HEX);
decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX);
if (is_input_key_hex_) {
if (has_from_) {
@ -1746,6 +1749,7 @@ void InternalDumpCommand::Help(std::string& ret) {
ret.append(" [--" + ARG_COUNT_ONLY + "]");
ret.append(" [--" + ARG_COUNT_DELIM + "=<char>]");
ret.append(" [--" + ARG_STATS + "]");
ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]");
ret.append("\n");
}
@ -1777,8 +1781,8 @@ void InternalDumpCommand::DoCommand() {
long long count = 0;
for (auto& key_version : key_versions) {
InternalKey ikey(key_version.user_key, key_version.sequence,
static_cast<ValueType>(key_version.type));
ValueType value_type = static_cast<ValueType>(key_version.type);
InternalKey ikey(key_version.user_key, key_version.sequence, value_type);
if (has_to_ && ikey.user_key() == to_) {
// GetAllKeyVersions() includes keys with user key `to_`, but idump has
// traditionally excluded such keys.
@ -1812,8 +1816,21 @@ void InternalDumpCommand::DoCommand() {
if (!count_only_ && !count_delim_) {
std::string key = ikey.DebugString(is_key_hex_);
std::string value = Slice(key_version.value).ToString(is_value_hex_);
std::cout << key << " => " << value << "\n";
Slice value(key_version.value);
if (!decode_blob_index_ || value_type != kTypeBlobIndex) {
fprintf(stdout, "%s => %s\n", key.c_str(),
value.ToString(is_value_hex_).c_str());
} else {
BlobIndex blob_index;
const Status s = blob_index.DecodeFrom(value);
if (!s.ok()) {
fprintf(stderr, "%s => error decoding blob index =>\n", key.c_str());
} else {
fprintf(stdout, "%s => %s\n", key.c_str(),
blob_index.DebugString(is_value_hex_).c_str());
}
}
}
// Terminate if maximum number of keys have been dumped
@ -1841,13 +1858,14 @@ DBDumperCommand::DBDumperCommand(
{ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM,
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM,
ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET,
ARG_TIMESTAMP, ARG_PATH})),
ARG_TIMESTAMP, ARG_PATH, ARG_DECODE_BLOB_INDEX})),
null_from_(true),
null_to_(true),
max_keys_(-1),
count_only_(false),
count_delim_(false),
print_stats_(false) {
print_stats_(false),
decode_blob_index_(false) {
auto itr = options.find(ARG_FROM);
if (itr != options.end()) {
null_from_ = false;
@ -1887,6 +1905,7 @@ DBDumperCommand::DBDumperCommand(
print_stats_ = IsFlagPresent(flags, ARG_STATS);
count_only_ = IsFlagPresent(flags, ARG_COUNT_ONLY);
decode_blob_index_ = IsFlagPresent(flags, ARG_DECODE_BLOB_INDEX);
if (is_key_hex_) {
if (!null_from_) {
@ -1920,6 +1939,7 @@ void DBDumperCommand::Help(std::string& ret) {
ret.append(" [--" + ARG_TTL_START + "=<N>:- is inclusive]");
ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]");
ret.append(" [--" + ARG_PATH + "=<path_to_a_file>]");
ret.append(" [--" + ARG_DECODE_BLOB_INDEX + "]");
ret.append("\n");
}
@ -1958,7 +1978,7 @@ void DBDumperCommand::DoCommand() {
break;
case kTableFile:
DumpSstFile(options_, path_, is_key_hex_, /* show_properties */ true,
/* decode_blob_index */ false);
decode_blob_index_);
break;
case kDescriptorFile:
DumpManifestFile(options_, path_, /* verbose_ */ false, is_key_hex_,

View File

@ -107,6 +107,7 @@ class DBDumperCommand : public LDBCommand {
bool count_delim_;
bool print_stats_;
std::string path_;
bool decode_blob_index_;
static const std::string ARG_COUNT_ONLY;
static const std::string ARG_COUNT_DELIM;
@ -137,6 +138,7 @@ class InternalDumpCommand : public LDBCommand {
bool count_delim_;
bool print_stats_;
bool is_input_key_hex_;
bool decode_blob_index_;
static const std::string ARG_DELIM;
static const std::string ARG_COUNT_ONLY;

View File

@ -393,6 +393,20 @@ class LDBTestCase(unittest.TestCase):
hex(ord('b'))),
"'a' seq:1, type:1 => val\nInternal keys in range: 1")
def testIDumpDecodeBlobIndex(self):
print("Running testIDumpDecodeBlobIndex...")
self.assertRunOK("put a val --create_if_missing", "OK")
self.assertRunOK("put b val --enable_blob_files", "OK")
# Pattern to expect from dump with decode_blob_index flag enabled.
regex = ".*\[blob ref\].*"
expected_pattern = re.compile(regex)
cmd = "idump %s --decode_blob_index"
self.assertRunOKFull((cmd)
% (self.dbParam(self.DB_NAME)),
expected_pattern, unexpected=False,
isPattern=True)
def testMiscAdminTask(self):
print("Running testMiscAdminTask...")
# These tests need to be improved; for example with asserts about
@ -687,16 +701,16 @@ class LDBTestCase(unittest.TestCase):
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK")
self.assertRunOK("put sst2 sst2_val", "OK")
self.assertRunOK("put sst2 sst2_val --enable_blob_files", "OK")
self.assertRunOK("get sst1", "sst1_val")
# Pattern to expect from SST dump.
regex = ".*Sst file format:.*"
regex = ".*Sst file format:.*\n.*\[blob ref\].*"
expected_pattern = re.compile(regex)
sst_files = self.getSSTFiles(dbPath)
self.assertTrue(len(sst_files) >= 1)
cmd = "dump --path=%s"
cmd = "dump --path=%s --decode_blob_index"
self.assertRunOKFull((cmd)
% (sst_files[0]),
expected_pattern, unexpected=False,