Make ldb automagically determine the file type and use the correct dumping function

Summary:
This set of changes implements the following design: `ldb` will utilize `--path` parameter which can be used to specify a file name. Tool will then apply some heuristic to determine how to output the data properly. The design decision is not to probe the file content, but use file names to determine what dumping function to call.

Usage examples:

Understands that path points to a manifest file and dumps it.
`./ldb --path=/tmp/test_db/MANIFEST-000023 dump`

Understands that path points to a WAL file and dumps it.
`./ldb --path=/tmp/test_db/000024.log dump --header`

Understands that path points to a SST file and dumps it.
`./ldb --path=/tmp/test_db/000007.sst dump`

Figures out that none of the supported file types are applicable and outputs
an appropriate error message.
`./ldb --path=/tmp/cron.log dump`

Test Plan:
Basics:

git diff
make clean
make -j 32 commit-prereq
arc lint

More specific testing (done as part of commit-prereq, but can be iterated separately when making isolated changes):

make clean
make ldb
python tools/ldb_test.py
make rocksdb_dump
make rocksdb_undump
sh tools/rocksdb_dump_test.sh

Reviewers: rven, IslamAbdelRahman, yhchiang, kradhakrishnan, anthony, igor, sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D52269
This commit is contained in:
Gunnar Kudrjavets 2016-01-06 14:19:08 -08:00
parent ba83447363
commit b1a3b4c0d0
4 changed files with 176 additions and 42 deletions

View File

@ -8,6 +8,7 @@
* Introduce CompactionJobInfo::compaction_reason, this field include the reason to trigger the compaction.
* After slow down is triggered, if estimated pending compaction bytes keep increasing, slowdown more.
* Increase default options.delayed_write_rate to 2MB/s.
* Added a new parameter --path to ldb tool. --path accepts the name of either MANIFEST, SST or a WAL file. Either --db or --path can be used when calling ldb.
## 4.3.0 (12/8/2015)
### New Features
@ -21,8 +22,8 @@
## 4.2.0 (11/9/2015)
### New Features
* Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions.
* Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families.
* Introduce CreateLoggerFromOptions(), this function create a Logger for provided DBOptions.
* Add GetAggregatedIntProperty(), which returns the sum of the GetIntProperty of all the column families.
* Add MemoryUtil in rocksdb/utilities/memory.h. It currently offers a way to get the memory usage by type from a list rocksdb instances.
### Public API Changes

View File

@ -40,6 +40,7 @@ namespace rocksdb {
using namespace std;
const string LDBCommand::ARG_DB = "db";
const string LDBCommand::ARG_PATH = "path";
const string LDBCommand::ARG_HEX = "hex";
const string LDBCommand::ARG_KEY_HEX = "key_hex";
const string LDBCommand::ARG_VALUE_HEX = "value_hex";
@ -62,6 +63,14 @@ const string LDBCommand::ARG_CREATE_IF_MISSING = "create_if_missing";
const char* LDBCommand::DELIM = " ==> ";
namespace {
void DumpWalFile(std::string wal_file, bool print_header, bool print_values,
LDBCommandExecuteResult* exec_state);
void DumpSstFile(std::string filename, bool output_hex, bool show_properties);
};
LDBCommand* LDBCommand::InitFromCmdLineArgs(
int argc,
char** argv,
@ -394,8 +403,10 @@ bool LDBCommand::ValidateCmdLineOptions() {
}
}
if (!NoDBOpen() && option_map_.find(ARG_DB) == option_map_.end()) {
fprintf(stderr, "%s must be specified\n", ARG_DB.c_str());
if (!NoDBOpen() && option_map_.find(ARG_DB) == option_map_.end() &&
option_map_.find(ARG_PATH) == option_map_.end()) {
fprintf(stderr, "Either %s or %s must be specified.\n", ARG_DB.c_str(),
ARG_PATH.c_str());
return false;
}
@ -733,21 +744,20 @@ const string InternalDumpCommand::ARG_INPUT_KEY_HEX = "input_key_hex";
InternalDumpCommand::InternalDumpCommand(const vector<string>& params,
const map<string, string>& options,
const vector<string>& flags) :
LDBCommand(options, flags, true,
BuildCmdLineOptions({ ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX,
ARG_FROM, ARG_TO, ARG_MAX_KEYS,
ARG_COUNT_ONLY, ARG_COUNT_DELIM, ARG_STATS,
ARG_INPUT_KEY_HEX})),
has_from_(false),
has_to_(false),
max_keys_(-1),
delim_("."),
count_only_(false),
count_delim_(false),
print_stats_(false),
is_input_key_hex_(false) {
const vector<string>& flags)
: LDBCommand(
options, flags, true,
BuildCmdLineOptions({ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM,
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY,
ARG_COUNT_DELIM, ARG_STATS, ARG_INPUT_KEY_HEX})),
has_from_(false),
has_to_(false),
max_keys_(-1),
delim_("."),
count_only_(false),
count_delim_(false),
print_stats_(false),
is_input_key_hex_(false) {
has_from_ = ParseStringOption(options, ARG_FROM, &from_);
has_to_ = ParseStringOption(options, ARG_TO, &to_);
@ -891,21 +901,20 @@ const string DBDumperCommand::ARG_STATS = "stats";
const string DBDumperCommand::ARG_TTL_BUCKET = "bucket";
DBDumperCommand::DBDumperCommand(const vector<string>& params,
const map<string, string>& options, const vector<string>& flags) :
LDBCommand(options, flags, true,
BuildCmdLineOptions({ARG_TTL, ARG_HEX, ARG_KEY_HEX,
ARG_VALUE_HEX, ARG_FROM, ARG_TO,
ARG_MAX_KEYS, ARG_COUNT_ONLY,
ARG_COUNT_DELIM, ARG_STATS, ARG_TTL_START,
ARG_TTL_END, ARG_TTL_BUCKET,
ARG_TIMESTAMP})),
null_from_(true),
null_to_(true),
max_keys_(-1),
count_only_(false),
count_delim_(false),
print_stats_(false) {
const map<string, string>& options,
const vector<string>& flags)
: LDBCommand(options, flags, true,
BuildCmdLineOptions(
{ARG_TTL, ARG_HEX, ARG_KEY_HEX, ARG_VALUE_HEX, ARG_FROM,
ARG_TO, ARG_MAX_KEYS, ARG_COUNT_ONLY, ARG_COUNT_DELIM,
ARG_STATS, ARG_TTL_START, ARG_TTL_END, ARG_TTL_BUCKET,
ARG_TIMESTAMP, ARG_PATH})),
null_from_(true),
null_to_(true),
max_keys_(-1),
count_only_(false),
count_delim_(false),
print_stats_(false) {
map<string, string>::const_iterator itr = options.find(ARG_FROM);
if (itr != options.end()) {
null_from_ = false;
@ -954,6 +963,11 @@ DBDumperCommand::DBDumperCommand(const vector<string>& params,
to_ = HexToString(to_);
}
}
itr = options.find(ARG_PATH);
if (itr != options.end()) {
path_ = itr->second;
}
}
void DBDumperCommand::Help(string& ret) {
@ -969,13 +983,63 @@ void DBDumperCommand::Help(string& ret) {
ret.append(" [--" + ARG_TTL_BUCKET + "=<N>]");
ret.append(" [--" + ARG_TTL_START + "=<N>:- is inclusive]");
ret.append(" [--" + ARG_TTL_END + "=<N>:- is exclusive]");
ret.append(" [--" + ARG_PATH + "=<path_to_a_file>]");
ret.append("\n");
}
/**
* Handles two separate cases:
*
* 1) --db is specified - just dump the database.
*
* 2) --path is specified - determine based on file extension what dumping
* function to call. Please note that we intentionally use the extension
* and avoid probing the file contents under the assumption that renaming
* the files is not a supported scenario.
*
*/
void DBDumperCommand::DoCommand() {
if (!db_) {
return;
assert(!path_.empty());
string fileName = GetFileNameFromPath(path_);
uint64_t number;
FileType type;
exec_state_ = LDBCommandExecuteResult::Succeed("");
if (!ParseFileName(fileName, &number, &type)) {
exec_state_ =
LDBCommandExecuteResult::Failed("Can't parse file type: " + path_);
return;
}
switch (type) {
case kLogFile:
DumpWalFile(path_, /* print_header_ */ true, /* print_values_ */ true,
&exec_state_);
break;
case kTableFile:
DumpSstFile(path_, is_key_hex_, /* show_properties */ true);
break;
case kDescriptorFile:
DumpManifestFile(path_, /* verbose_ */ false, is_key_hex_,
/* json_ */ false);
break;
default:
exec_state_ = LDBCommandExecuteResult::Failed(
"File type not supported: " + path_);
break;
}
} else {
DoDumpCommand();
}
}
void DBDumperCommand::DoDumpCommand() {
assert(nullptr != db_);
assert(path_.empty());
// Parse command line args
uint64_t count = 0;
if (print_stats_) {

View File

@ -40,6 +40,7 @@ public:
// Command-line arguments
static const string ARG_DB;
static const string ARG_PATH;
static const string ARG_HEX;
static const string ARG_KEY_HEX;
static const string ARG_VALUE_HEX;
@ -90,10 +91,8 @@ public:
}
virtual ~LDBCommand() {
if (db_ != nullptr) {
delete db_;
db_ = nullptr;
}
delete db_;
db_ = nullptr;
}
/* Run the command, and return the execute result. */
@ -104,12 +103,12 @@ public:
if (db_ == nullptr && !NoDBOpen()) {
OpenDB();
if (!exec_state_.IsNotStarted()) {
return;
}
}
// We'll intentionally proceed even if the DB can't be opened because users
// can also specify a filename, not just a directory.
DoCommand();
if (exec_state_.IsNotStarted()) {
exec_state_ = LDBCommandExecuteResult::Succeed("");
}
@ -441,6 +440,22 @@ public:
virtual void DoCommand() override;
private:
/**
* Extract file name from the full path. We handle both the forward slash (/)
* and backslash (\) to make sure that different OS-s are supported.
*/
static string GetFileNameFromPath(const string& s) {
std::size_t n = s.find_last_of("/\\");
if (std::string::npos == n) {
return s;
} else {
return s.substr(n + 1);
}
}
void DoDumpCommand();
bool null_from_;
string from_;
bool null_to_;
@ -450,6 +465,7 @@ private:
bool count_only_;
bool count_delim_;
bool print_stats_;
string path_;
static const string ARG_COUNT_ONLY;
static const string ARG_COUNT_DELIM;

View File

@ -408,6 +408,12 @@ class LDBTestCase(unittest.TestCase):
def getManifests(self, directory):
return glob.glob(directory + "/MANIFEST-*")
def getSSTFiles(self, directory):
return glob.glob(directory + "/*.sst")
def getWALFiles(self, directory):
return glob.glob(directory + "/*.log")
def copyManifests(self, src, dest):
return 0 == run_err_null("cp " + src + " " + dest)
@ -439,6 +445,53 @@ class LDBTestCase(unittest.TestCase):
% (dbPath, manifest_files[1]),
expected_pattern, unexpected=False,
isPattern=True)
# Make sure that using the dump with --path will result in identical
# output as just using manifest_dump.
cmd = "dump --path=%s"
self.assertRunOKFull((cmd)
% (manifest_files[1]),
expected_pattern, unexpected=False,
isPattern=True)
def testSSTDump(self):
print "Running testSSTDump..."
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put sst1 sst1_val --create_if_missing", "OK")
self.assertRunOK("put sst2 sst2_val", "OK")
self.assertRunOK("get sst1", "sst1_val")
# Pattern to expect from SST dump.
regex = ".*Sst file format:.*"
expected_pattern = re.compile(regex)
sst_files = self.getSSTFiles(dbPath)
self.assertTrue(len(sst_files) >= 1)
cmd = "dump --path=%s"
self.assertRunOKFull((cmd)
% (sst_files[0]),
expected_pattern, unexpected=False,
isPattern=True)
def testWALDump(self):
print "Running testWALDump..."
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put wal1 wal1_val --create_if_missing", "OK")
self.assertRunOK("put wal2 wal2_val", "OK")
self.assertRunOK("get wal1", "wal1_val")
# Pattern to expect from WAL dump.
regex = "^Sequence,Count,ByteSize,Physical Offset,Key\(s\).*"
expected_pattern = re.compile(regex)
wal_files = self.getWALFiles(dbPath)
self.assertTrue(len(wal_files) >= 1)
cmd = "dump --path=%s"
self.assertRunOKFull((cmd)
% (wal_files[0]),
expected_pattern, unexpected=False,
isPattern=True)
def testListColumnFamilies(self):
print "Running testListColumnFamilies..."