First version of rocksdb_dump and rocksdb_undump.

Summary: Hack up rocksdb_dump and rocksdb_undump utilities to get this task rolling/promote discussion.

Test Plan: Dump/undump databases recursively to see if nothing is lost.

Reviewers: sdong, yhchiang, rven, anthony, kradhakrishnan, igor

Reviewed By: igor

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D37269
This commit is contained in:
Michael Callahan 2015-06-19 16:24:36 -07:00
parent 04251e1e3a
commit 15325bf55b
7 changed files with 321 additions and 1 deletions

2
.gitignore vendored
View File

@ -33,6 +33,8 @@ package/
.phutil_module_cache .phutil_module_cache
unity unity
tags tags
rocksdb_dump
rocksdb_undump
java/out java/out
java/target java/target

16
DUMP_FORMAT.md Normal file
View File

@ -0,0 +1,16 @@
## RocksDB dump format
The version 1 RocksDB dump format is fairly simple:
1) The dump starts with the magic 8 byte identifier "ROCKDUMP"
2) The magic is followed by an 8 byte big-endian version which is 0x00000001.
3) Next are arbitrarily sized chunks of bytes prepended by 4 byte little endian number indicating how large each chunk is.
4) The first chunk is special and is a json string indicating some things about the creation of this dump. It contains the following keys:
* database-path: The path of the database this dump was created from.
* hostname: The hostname of the machine where the dump was created.
* creation-time: Unix seconds since epoc when this dump was created.
5) Following the info dump the slices paired into are key/value pairs.

View File

@ -300,7 +300,9 @@ TOOLS = \
db_sanity_test \ db_sanity_test \
db_stress \ db_stress \
ldb \ ldb \
db_repl_stress db_repl_stress \
rocksdb_dump \
rocksdb_undump
BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench
@ -516,6 +518,8 @@ check: all
echo "===== Running $$t"; ./$$t || exit 1; done; \ echo "===== Running $$t"; ./$$t || exit 1; done; \
fi fi
rm -rf $(TMPD) rm -rf $(TMPD)
python tools/ldb_test.py
sh tools/rocksdb_dump_test.sh
check_some: $(SUBSET) ldb_tests check_some: $(SUBSET) ldb_tests
for t in $(SUBSET); do echo "===== Running $$t"; ./$$t || exit 1; done for t in $(SUBSET); do echo "===== Running $$t"; ./$$t || exit 1; done
@ -795,6 +799,12 @@ deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS)
geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS) geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK) $(AM_LINK)
rocksdb_dump: tools/dump/rocksdb_dump.o $(LIBOBJECTS)
$(AM_LINK)
rocksdb_undump: tools/dump/rocksdb_undump.o $(LIBOBJECTS)
$(AM_LINK)
cuckoo_table_builder_test: table/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS) cuckoo_table_builder_test: table/cuckoo_table_builder_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK) $(AM_LINK)

149
tools/dump/rocksdb_dump.cc Normal file
View File

@ -0,0 +1,149 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
return 1;
}
#else
#include <gflags/gflags.h>
#include <iostream>
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "util/coding.h"
DEFINE_bool(anonymous, false, "Output an empty information blob.");
void usage(const char* name) {
std::cout << "usage: " << name << " [--anonymous] <db> <dumpfile>"
<< std::endl;
}
int main(int argc, char** argv) {
rocksdb::DB* dbptr;
rocksdb::Options options;
rocksdb::Status status;
std::unique_ptr<rocksdb::WritableFile> dumpfile;
char hostname[1024];
int64_t timesec;
std::string abspath;
char json[4096];
GFLAGS::ParseCommandLineFlags(&argc, &argv, true);
static const char* magicstr = "ROCKDUMP";
static const char versionstr[8] = {0, 0, 0, 0, 0, 0, 0, 1};
if (argc != 3) {
usage(argv[0]);
exit(1);
}
rocksdb::Env* env = rocksdb::Env::Default();
// Open the database
options.create_if_missing = false;
status = rocksdb::DB::OpenForReadOnly(options, argv[1], &dbptr);
if (!status.ok()) {
std::cerr << "Unable to open database '" << argv[1]
<< "' for reading: " << status.ToString() << std::endl;
exit(1);
}
const std::unique_ptr<rocksdb::DB> db(dbptr);
status = env->NewWritableFile(argv[2], &dumpfile, rocksdb::EnvOptions());
if (!status.ok()) {
std::cerr << "Unable to open dump file '" << argv[2]
<< "' for writing: " << status.ToString() << std::endl;
exit(1);
}
rocksdb::Slice magicslice(magicstr, 8);
status = dumpfile->Append(magicslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
rocksdb::Slice versionslice(versionstr, 8);
status = dumpfile->Append(versionslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
if (FLAGS_anonymous) {
snprintf(json, sizeof(json), "{}");
} else {
status = env->GetHostName(hostname, sizeof(hostname));
status = env->GetCurrentTime(&timesec);
status = env->GetAbsolutePath(argv[1], &abspath);
snprintf(json, sizeof(json),
"{ \"database-path\": \"%s\", \"hostname\": \"%s\", "
"\"creation-time\": %ld }",
abspath.c_str(), hostname, timesec);
}
rocksdb::Slice infoslice(json, strlen(json));
char infosize[4];
rocksdb::EncodeFixed32(infosize, (uint32_t)infoslice.size());
rocksdb::Slice infosizeslice(infosize, 4);
status = dumpfile->Append(infosizeslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
status = dumpfile->Append(infoslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
const std::unique_ptr<rocksdb::Iterator> it(
db->NewIterator(rocksdb::ReadOptions()));
for (it->SeekToFirst(); it->Valid(); it->Next()) {
char keysize[4];
rocksdb::EncodeFixed32(keysize, (uint32_t)it->key().size());
rocksdb::Slice keysizeslice(keysize, 4);
status = dumpfile->Append(keysizeslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
status = dumpfile->Append(it->key());
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
char valsize[4];
rocksdb::EncodeFixed32(valsize, (uint32_t)it->value().size());
rocksdb::Slice valsizeslice(valsize, 4);
status = dumpfile->Append(valsizeslice);
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
status = dumpfile->Append(it->value());
if (!status.ok()) {
std::cerr << "Append failed: " << status.ToString() << std::endl;
exit(1);
}
}
if (!it->status().ok()) {
std::cerr << "Database iteration failed: " << status.ToString()
<< std::endl;
exit(1);
}
return 0;
}
#endif // GFLAGS

View File

@ -0,0 +1,136 @@
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#include <cstring>
#include <iostream>
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "util/coding.h"
void usage(const char *name) {
std::cout << "usage: " << name << " <dumpfile> <rocksdb>" << std::endl;
}
int main(int argc, char **argv) {
rocksdb::DB *dbptr;
rocksdb::Options options;
rocksdb::Status status;
rocksdb::Env *env;
std::unique_ptr<rocksdb::SequentialFile> dumpfile;
rocksdb::Slice slice;
char scratch8[8];
static const char *magicstr = "ROCKDUMP";
static const char versionstr[8] = {0, 0, 0, 0, 0, 0, 0, 1};
if (argc != 3) {
usage(argv[0]);
exit(1);
}
env = rocksdb::Env::Default();
status = env->NewSequentialFile(argv[1], &dumpfile, rocksdb::EnvOptions());
if (!status.ok()) {
std::cerr << "Unable to open dump file '" << argv[1]
<< "' for reading: " << status.ToString() << std::endl;
exit(1);
}
status = dumpfile->Read(8, &slice, scratch8);
if (!status.ok() || slice.size() != 8 ||
memcmp(slice.data(), magicstr, 8) != 0) {
std::cerr << "File '" << argv[1] << "' is not a recognizable dump file."
<< std::endl;
exit(1);
}
status = dumpfile->Read(8, &slice, scratch8);
if (!status.ok() || slice.size() != 8 ||
memcmp(slice.data(), versionstr, 8) != 0) {
std::cerr << "File '" << argv[1] << "' version not recognized."
<< std::endl;
exit(1);
}
status = dumpfile->Read(4, &slice, scratch8);
if (!status.ok() || slice.size() != 4) {
std::cerr << "Unable to read info blob size." << std::endl;
exit(1);
}
uint32_t infosize = rocksdb::DecodeFixed32(slice.data());
status = dumpfile->Skip(infosize);
if (!status.ok()) {
std::cerr << "Unable to skip info blob: " << status.ToString() << std::endl;
exit(1);
}
options.create_if_missing = true;
status = rocksdb::DB::Open(options, argv[2], &dbptr);
if (!status.ok()) {
std::cerr << "Unable to open database '" << argv[2]
<< "' for writing: " << status.ToString() << std::endl;
exit(1);
}
const std::unique_ptr<rocksdb::DB> db(dbptr);
uint32_t last_keysize = 64;
size_t last_valsize = 1 << 20;
std::unique_ptr<char[]> keyscratch(new char[last_keysize]);
std::unique_ptr<char[]> valscratch(new char[last_valsize]);
while (1) {
uint32_t keysize, valsize;
rocksdb::Slice keyslice;
rocksdb::Slice valslice;
status = dumpfile->Read(4, &slice, scratch8);
if (!status.ok() || slice.size() != 4) break;
keysize = rocksdb::DecodeFixed32(slice.data());
if (keysize > last_keysize) {
while (keysize > last_keysize) last_keysize *= 2;
keyscratch = std::unique_ptr<char[]>(new char[last_keysize]);
}
status = dumpfile->Read(keysize, &keyslice, keyscratch.get());
if (!status.ok() || keyslice.size() != keysize) {
std::cerr << "Key read failure: "
<< (status.ok() ? "insufficient data" : status.ToString())
<< std::endl;
exit(1);
}
status = dumpfile->Read(4, &slice, scratch8);
if (!status.ok() || slice.size() != 4) {
std::cerr << "Unable to read value size: "
<< (status.ok() ? "insufficient data" : status.ToString())
<< std::endl;
exit(1);
}
valsize = rocksdb::DecodeFixed32(slice.data());
if (valsize > last_valsize) {
while (valsize > last_valsize) last_valsize *= 2;
valscratch = std::unique_ptr<char[]>(new char[last_valsize]);
}
status = dumpfile->Read(valsize, &valslice, valscratch.get());
if (!status.ok() || valslice.size() != valsize) {
std::cerr << "Unable to read value: "
<< (status.ok() ? "insufficient data" : status.ToString())
<< std::endl;
exit(1);
}
status = db->Put(rocksdb::WriteOptions(), keyslice, valslice);
if (!status.ok()) {
fprintf(stderr, "Unable to write database entry\n");
exit(1);
}
}
return 0;
}

7
tools/rocksdb_dump_test.sh Executable file
View File

@ -0,0 +1,7 @@
TESTDIR=`mktemp -d /tmp/rocksdb-dump-test.XXXXX`
DUMPFILE="tools/sample-dump.dmp"
# Verify that the sample dump file is undumpable and then redumpable.
./rocksdb_undump $DUMPFILE $TESTDIR/db
./rocksdb_dump --anonymous $TESTDIR/db $TESTDIR/dump
cmp $DUMPFILE $TESTDIR/dump

BIN
tools/sample-dump.dmp Normal file

Binary file not shown.