Added 'dump_live_files' command to ldb tool.

Summary:
Priliminary diff to solicit comments.
Given DB path, dump all SST files (key/value and properties), WAL file and manifest
files. What command options do we need to support for this command? Maybe
output_hex for keys?

Test Plan: Create additional ldb unit tests.

Reviewers: sdong, rven

Reviewed By: rven

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D29547
This commit is contained in:
Qiao Yang 2014-11-24 10:04:16 -08:00
parent 7ab1526c0e
commit cef6f84393
6 changed files with 326 additions and 134 deletions

View File

@ -378,6 +378,22 @@ class LDBTestCase(unittest.TestCase):
my_check_output("rm -f %s" % sstFilePath, shell=True)
self.assertRunFAIL("checkconsistency")
def dumpLiveFiles(self, params, dumpFile):
return 0 == run_err_null("./ldb dump_live_files %s > %s" % (
params, dumpFile))
def testDumpLiveFiles(self):
print "Running testDumpLiveFiles..."
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put x1 y1 --create_if_missing", "OK")
self.assertRunOK("put x2 y2", "OK")
dumpFilePath = os.path.join(self.TMP_DIR, "dump1")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath))
self.assertRunOK("delete x1", "OK")
self.assertRunOK("put x3 y3", "OK")
dumpFilePath = os.path.join(self.TMP_DIR, "dump2")
self.assertTrue(self.dumpLiveFiles("--db=%s" % dbPath, dumpFilePath))
if __name__ == "__main__":
unittest.main()

View File

@ -14,7 +14,9 @@
#include "db/write_batch_internal.h"
#include "rocksdb/write_batch.h"
#include "rocksdb/cache.h"
#include "rocksdb/table_properties.h"
#include "util/coding.h"
#include "util/sst_dump_tool_imp.h"
#include "util/scoped_arena_iterator.h"
#include "utilities/ttl/db_ttl_impl.h"
@ -165,6 +167,8 @@ LDBCommand* LDBCommand::SelectCommand(
return new ManifestDumpCommand(cmdParams, option_map, flags);
} else if (cmd == ListColumnFamiliesCommand::Name()) {
return new ListColumnFamiliesCommand(cmdParams, option_map, flags);
} else if (cmd == DBFileDumperCommand::Name()) {
return new DBFileDumperCommand(cmdParams, option_map, flags);
} else if (cmd == InternalDumpCommand::Name()) {
return new InternalDumpCommand(cmdParams, option_map, flags);
} else if (cmd == CheckConsistencyCommand::Name()) {
@ -438,6 +442,8 @@ void CompactorCommand::DoCommand() {
delete end;
}
// ----------------------------------------------------------------------------
const string DBLoaderCommand::ARG_DISABLE_WAL = "disable_wal";
const string DBLoaderCommand::ARG_BULK_LOAD = "bulk_load";
const string DBLoaderCommand::ARG_COMPACT = "compact";
@ -513,6 +519,31 @@ void DBLoaderCommand::DoCommand() {
// ----------------------------------------------------------------------------
namespace {
void DumpManifestFile(std::string file, bool verbose, bool hex) {
Options options;
EnvOptions sopt;
std::string dbname("dummy");
std::shared_ptr<Cache> tc(
NewLRUCache(options.max_open_files - 10, options.table_cache_numshardbits,
options.table_cache_remove_scan_count_limit));
// Notice we are using the default options not through SanitizeOptions(),
// if VersionSet::DumpManifest() depends on any option done by
// SanitizeOptions(), we need to initialize it manually.
options.db_paths.emplace_back("dummy", 0);
WriteController wc;
WriteBuffer wb(options.db_write_buffer_size);
VersionSet versions(dbname, &options, sopt, tc.get(), &wb, &wc);
Status s = versions.DumpManifest(options, file, verbose, hex);
if (!s.ok()) {
printf("Error in processing file %s %s\n", file.c_str(),
s.ToString().c_str());
}
}
} // namespace
const string ManifestDumpCommand::ARG_VERBOSE = "verbose";
const string ManifestDumpCommand::ARG_PATH = "path";
@ -585,25 +616,7 @@ void ManifestDumpCommand::DoCommand() {
printf("Processing Manifest file %s\n", manifestfile.c_str());
}
Options options;
EnvOptions sopt;
std::string file(manifestfile);
std::string dbname("dummy");
std::shared_ptr<Cache> tc(NewLRUCache(
options.max_open_files - 10, options.table_cache_numshardbits,
options.table_cache_remove_scan_count_limit));
// Notice we are using the default options not through SanitizeOptions(),
// if VersionSet::DumpManifest() depends on any option done by
// SanitizeOptions(), we need to initialize it manually.
options.db_paths.emplace_back("dummy", 0);
WriteController wc;
WriteBuffer wb(options.db_write_buffer_size);
VersionSet versions(dbname, &options, sopt, tc.get(), &wb, &wc);
Status s = versions.DumpManifest(options, file, verbose_, is_key_hex_);
if (!s.ok()) {
printf("Error in processing file %s %s\n", manifestfile.c_str(),
s.ToString().c_str());
}
DumpManifestFile(manifestfile, verbose_, is_key_hex_);
if (verbose_) {
printf("Processing Manifest file %s done\n", manifestfile.c_str());
}
@ -1325,9 +1338,19 @@ void ChangeCompactionStyleCommand::DoCommand() {
files_per_level.c_str());
}
// ----------------------------------------------------------------------------
namespace {
struct StdErrReporter : public log::Reader::Reporter {
virtual void Corruption(size_t bytes, const Status& s) {
cerr << "Corruption detected in log file " << s.ToString() << "\n";
}
};
class InMemoryHandler : public WriteBatch::Handler {
public:
InMemoryHandler(stringstream& row, bool print_values) : Handler(),row_(row) {
InMemoryHandler(stringstream& row, bool print_values) : Handler(), row_(row) {
print_values_ = print_values;
}
@ -1357,13 +1380,63 @@ class InMemoryHandler : public WriteBatch::Handler {
row_ << LDBCommand::StringToHex(key.ToString()) << " ";
}
virtual ~InMemoryHandler() { };
virtual ~InMemoryHandler() {}
private:
stringstream & row_;
bool print_values_;
};
void DumpWalFile(std::string wal_file, bool print_header, bool print_values,
LDBCommandExecuteResult* exec_state) {
unique_ptr<SequentialFile> file;
Env* env_ = Env::Default();
EnvOptions soptions;
Status status = env_->NewSequentialFile(wal_file, &file, soptions);
if (!status.ok()) {
if (exec_state) {
*exec_state = LDBCommandExecuteResult::FAILED("Failed to open WAL file " +
status.ToString());
} else {
cerr << "Error: Failed to open WAL file " << status.ToString()
<< std::endl;
}
} else {
StdErrReporter reporter;
log::Reader reader(move(file), &reporter, true, 0);
string scratch;
WriteBatch batch;
Slice record;
stringstream row;
if (print_header) {
cout << "Sequence,Count,ByteSize,Physical Offset,Key(s)";
if (print_values) {
cout << " : value ";
}
cout << "\n";
}
while (reader.ReadRecord(&record, &scratch)) {
row.str("");
if (record.size() < 12) {
reporter.Corruption(record.size(),
Status::Corruption("log record too small"));
} else {
WriteBatchInternal::SetContents(&batch, record);
row << WriteBatchInternal::Sequence(&batch) << ",";
row << WriteBatchInternal::Count(&batch) << ",";
row << WriteBatchInternal::ByteSize(&batch) << ",";
row << reader.LastRecordOffset() << ",";
InMemoryHandler handler(row, print_values);
batch.Iterate(&handler);
row << "\n";
}
cout << row.str();
}
}
}
} // namespace
const string WALDumperCommand::ARG_WAL_FILE = "walfile";
const string WALDumperCommand::ARG_PRINT_VALUE = "print_value";
const string WALDumperCommand::ARG_PRINT_HEADER = "header";
@ -1401,53 +1474,10 @@ void WALDumperCommand::Help(string& ret) {
}
void WALDumperCommand::DoCommand() {
struct StdErrReporter : public log::Reader::Reporter {
virtual void Corruption(size_t bytes, const Status& s) {
cerr<<"Corruption detected in log file "<<s.ToString()<<"\n";
}
};
unique_ptr<SequentialFile> file;
Env* env_ = Env::Default();
EnvOptions soptions;
Status status = env_->NewSequentialFile(wal_file_, &file, soptions);
if (!status.ok()) {
exec_state_ = LDBCommandExecuteResult::FAILED("Failed to open WAL file " +
status.ToString());
} else {
StdErrReporter reporter;
log::Reader reader(move(file), &reporter, true, 0);
string scratch;
WriteBatch batch;
Slice record;
stringstream row;
if (print_header_) {
cout<<"Sequence,Count,ByteSize,Physical Offset,Key(s)";
if (print_values_) {
cout << " : value ";
}
cout << "\n";
}
while(reader.ReadRecord(&record, &scratch)) {
row.str("");
if (record.size() < 12) {
reporter.Corruption(
record.size(), Status::Corruption("log record too small"));
} else {
WriteBatchInternal::SetContents(&batch, record);
row<<WriteBatchInternal::Sequence(&batch)<<",";
row<<WriteBatchInternal::Count(&batch)<<",";
row<<WriteBatchInternal::ByteSize(&batch)<<",";
row<<reader.LastRecordOffset()<<",";
InMemoryHandler handler(row, print_values_);
batch.Iterate(&handler);
row<<"\n";
}
cout<<row.str();
}
}
DumpWalFile(wal_file_, print_header_, print_values_, &exec_state_);
}
// ----------------------------------------------------------------------------
GetCommand::GetCommand(const vector<string>& params,
const map<string, string>& options, const vector<string>& flags) :
@ -1486,6 +1516,7 @@ void GetCommand::DoCommand() {
}
}
// ----------------------------------------------------------------------------
ApproxSizeCommand::ApproxSizeCommand(const vector<string>& params,
const map<string, string>& options, const vector<string>& flags) :
@ -1537,6 +1568,7 @@ void ApproxSizeCommand::DoCommand() {
*/
}
// ----------------------------------------------------------------------------
BatchPutCommand::BatchPutCommand(const vector<string>& params,
const map<string, string>& options, const vector<string>& flags) :
@ -1590,6 +1622,7 @@ Options BatchPutCommand::PrepareOptionsForOpenDB() {
return opt;
}
// ----------------------------------------------------------------------------
ScanCommand::ScanCommand(const vector<string>& params,
const map<string, string>& options, const vector<string>& flags) :
@ -1701,6 +1734,7 @@ void ScanCommand::DoCommand() {
delete it;
}
// ----------------------------------------------------------------------------
DeleteCommand::DeleteCommand(const vector<string>& params,
const map<string, string>& options, const vector<string>& flags) :
@ -1780,6 +1814,7 @@ Options PutCommand::PrepareOptionsForOpenDB() {
return opt;
}
// ----------------------------------------------------------------------------
const char* DBQuerierCommand::HELP_CMD = "help";
const char* DBQuerierCommand::GET_CMD = "get";
@ -1861,6 +1896,8 @@ void DBQuerierCommand::DoCommand() {
}
}
// ----------------------------------------------------------------------------
CheckConsistencyCommand::CheckConsistencyCommand(const vector<string>& params,
const map<string, string>& options, const vector<string>& flags) :
LDBCommand(options, flags, false,
@ -1889,5 +1926,117 @@ void CheckConsistencyCommand::DoCommand() {
}
}
// ----------------------------------------------------------------------------
namespace {
void DumpSstFile(std::string filename, bool output_hex, bool show_properties) {
std::string from_key;
std::string to_key;
if (filename.length() <= 4 ||
filename.rfind(".sst") != filename.length() - 4) {
std::cout << "Invalid sst file name." << std::endl;
return;
}
// no verification
rocksdb::SstFileReader reader(filename, false, output_hex);
Status st = reader.ReadSequential(true, -1, false, // has_from
from_key, false, // has_to
to_key);
if (!st.ok()) {
std::cerr << "Error in reading SST file " << filename << st.ToString()
<< std::endl;
return;
}
if (show_properties) {
const rocksdb::TableProperties* table_properties;
std::shared_ptr<const rocksdb::TableProperties>
table_properties_from_reader;
st = reader.ReadTableProperties(&table_properties_from_reader);
if (!st.ok()) {
std::cerr << filename << ": " << st.ToString()
<< ". Try to use initial table properties" << std::endl;
table_properties = reader.GetInitTableProperties();
} else {
table_properties = table_properties_from_reader.get();
}
if (table_properties != nullptr) {
std::cout << std::endl << "Table Properties:" << std::endl;
std::cout << table_properties->ToString("\n") << std::endl;
std::cout << "# deleted keys: "
<< rocksdb::GetDeletedKeys(
table_properties->user_collected_properties)
<< std::endl;
}
}
}
} // namespace
DBFileDumperCommand::DBFileDumperCommand(const vector<string>& params,
const map<string, string>& options,
const vector<string>& flags)
: LDBCommand(options, flags, true, BuildCmdLineOptions({})) {}
void DBFileDumperCommand::Help(string& ret) {
ret.append(" ");
ret.append(DBFileDumperCommand::Name());
ret.append("\n");
}
void DBFileDumperCommand::DoCommand() {
if (!db_) {
return;
}
Status s;
std::cout << "Manifest File" << std::endl;
std::cout << "==============================" << std::endl;
std::string manifest_filename;
s = ReadFileToString(db_->GetEnv(), CurrentFileName(db_->GetName()),
&manifest_filename);
if (!s.ok() || manifest_filename.empty() ||
manifest_filename.back() != '\n') {
std::cerr << "Error when reading CURRENT file "
<< CurrentFileName(db_->GetName()) << std::endl;
}
// remove the trailing '\n'
manifest_filename.resize(manifest_filename.size() - 1);
string manifest_filepath = db_->GetName() + "/" + manifest_filename;
std::cout << manifest_filepath << std::endl;
DumpManifestFile(manifest_filepath, false, false);
std::cout << std::endl;
std::cout << "SST Files" << std::endl;
std::cout << "==============================" << std::endl;
std::vector<LiveFileMetaData> metadata;
db_->GetLiveFilesMetaData(&metadata);
for (auto& fileMetadata : metadata) {
std::string filename = fileMetadata.db_path + fileMetadata.name;
std::cout << filename << " level:" << fileMetadata.level << std::endl;
std::cout << "------------------------------" << std::endl;
DumpSstFile(filename, false, true);
std::cout << std::endl;
}
std::cout << std::endl;
std::cout << "Write Ahead Log Files" << std::endl;
std::cout << "==============================" << std::endl;
rocksdb::VectorLogPtr wal_files;
s = db_->GetSortedWalFiles(wal_files);
if (!s.ok()) {
std::cerr << "Error when getting WAL files" << std::endl;
} else {
for (auto& wal : wal_files) {
// TODO(qyang): option.wal_dir should be passed into ldb command
std::string filename = db_->GetOptions().wal_dir + wal->PathName();
std::cout << filename << std::endl;
DumpWalFile(filename, true, true, &exec_state_);
}
}
}
} // namespace rocksdb
#endif // ROCKSDB_LITE

View File

@ -13,6 +13,8 @@
#include <stdlib.h>
#include <algorithm>
#include <stdio.h>
#include <vector>
#include <map>
#include "db/version_set.h"
#include "rocksdb/env.h"
@ -392,6 +394,19 @@ private:
string to_;
};
class DBFileDumperCommand : public LDBCommand {
public:
static string Name() { return "dump_live_files"; }
DBFileDumperCommand(const vector<string>& params,
const map<string, string>& options,
const vector<string>& flags);
static void Help(string& ret);
virtual void DoCommand();
};
class DBDumperCommand: public LDBCommand {
public:
static string Name() { return "dump"; }

View File

@ -80,6 +80,7 @@ public:
DBLoaderCommand::Help(ret);
ManifestDumpCommand::Help(ret);
ListColumnFamiliesCommand::Help(ret);
DBFileDumperCommand::Help(ret);
InternalDumpCommand::Help(ret);
fprintf(stderr, "%s\n", ret.c_str());

View File

@ -5,83 +5,16 @@
//
#ifndef ROCKSDB_LITE
#include "rocksdb/sst_dump_tool.h"
#include "util/sst_dump_tool_imp.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <map>
#include <string>
#include <vector>
#include <inttypes.h>
#include "db/dbformat.h"
#include "db/memtable.h"
#include "db/write_batch_internal.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/immutable_options.h"
#include "rocksdb/iterator.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
#include "rocksdb/table_properties.h"
#include "table/block.h"
#include "table/block_based_table_factory.h"
#include "table/block_builder.h"
#include "table/format.h"
#include "table/meta_blocks.h"
#include "table/plain_table_factory.h"
#include "util/ldb_cmd.h"
#include "util/random.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace rocksdb {
class SstFileReader {
public:
explicit SstFileReader(const std::string& file_name,
bool verify_checksum,
bool output_hex);
Status ReadSequential(bool print_kv,
uint64_t read_num,
bool has_from,
const std::string& from_key,
bool has_to,
const std::string& to_key);
Status ReadTableProperties(
std::shared_ptr<const TableProperties>* table_properties);
uint64_t GetReadNumber() { return read_num_; }
TableProperties* GetInitTableProperties() { return table_properties_.get(); }
private:
Status NewTableReader(const std::string& file_path);
Status ReadTableProperties(uint64_t table_magic_number,
RandomAccessFile* file, uint64_t file_size);
Status SetTableOptionsByMagicNumber(uint64_t table_magic_number);
Status SetOldTableOptions();
std::string file_name_;
uint64_t read_num_;
bool verify_checksum_;
bool output_hex_;
EnvOptions soptions_;
Status init_result_;
unique_ptr<TableReader> table_reader_;
unique_ptr<RandomAccessFile> file_;
// options_ and internal_comparator_ will also be used in
// ReadSequential internally (specifically, seek-related operations)
Options options_;
const ImmutableCFOptions ioptions_;
InternalKeyComparator internal_comparator_;
unique_ptr<TableProperties> table_properties_;
};
SstFileReader::SstFileReader(const std::string& file_path,
bool verify_checksum,
bool output_hex)

78
util/sst_dump_tool_imp.h Normal file
View File

@ -0,0 +1,78 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#ifndef ROCKSDB_LITE
#pragma once
#include "rocksdb/sst_dump_tool.h"
#include <map>
#include <string>
#include <vector>
#include "db/dbformat.h"
#include "db/memtable.h"
#include "db/write_batch_internal.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/immutable_options.h"
#include "rocksdb/iterator.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/status.h"
#include "rocksdb/table.h"
#include "rocksdb/table_properties.h"
#include "table/block.h"
#include "table/block_based_table_factory.h"
#include "table/block_builder.h"
#include "table/format.h"
#include "table/meta_blocks.h"
#include "table/plain_table_factory.h"
#include "util/ldb_cmd.h"
#include "util/random.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace rocksdb {
class SstFileReader {
public:
explicit SstFileReader(const std::string& file_name, bool verify_checksum,
bool output_hex);
Status ReadSequential(bool print_kv, uint64_t read_num, bool has_from,
const std::string& from_key, bool has_to,
const std::string& to_key);
Status ReadTableProperties(
std::shared_ptr<const TableProperties>* table_properties);
uint64_t GetReadNumber() { return read_num_; }
TableProperties* GetInitTableProperties() { return table_properties_.get(); }
private:
Status NewTableReader(const std::string& file_path);
Status ReadTableProperties(uint64_t table_magic_number,
RandomAccessFile* file, uint64_t file_size);
Status SetTableOptionsByMagicNumber(uint64_t table_magic_number);
Status SetOldTableOptions();
std::string file_name_;
uint64_t read_num_;
bool verify_checksum_;
bool output_hex_;
EnvOptions soptions_;
Status init_result_;
unique_ptr<TableReader> table_reader_;
unique_ptr<RandomAccessFile> file_;
// options_ and internal_comparator_ will also be used in
// ReadSequential internally (specifically, seek-related operations)
Options options_;
const ImmutableCFOptions ioptions_;
InternalKeyComparator internal_comparator_;
unique_ptr<TableProperties> table_properties_;
};
} // namespace rocksdb
#endif // ROCKSDB_LITE