2013-10-16 14:59:46 -07:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
2011-03-18 22:37:00 +00:00
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "util/coding.h"
|
|
|
|
|
2012-12-07 10:42:19 -08:00
|
|
|
#include <algorithm>
|
|
|
|
|
2013-10-03 21:49:15 -07:00
|
|
|
namespace rocksdb {
|
2011-03-18 22:37:00 +00:00
|
|
|
|
|
|
|
void EncodeFixed32(char* buf, uint32_t value) {
|
|
|
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
|
|
memcpy(buf, &value, sizeof(value));
|
|
|
|
#else
|
|
|
|
buf[0] = value & 0xff;
|
|
|
|
buf[1] = (value >> 8) & 0xff;
|
|
|
|
buf[2] = (value >> 16) & 0xff;
|
|
|
|
buf[3] = (value >> 24) & 0xff;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void EncodeFixed64(char* buf, uint64_t value) {
|
|
|
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
|
|
memcpy(buf, &value, sizeof(value));
|
|
|
|
#else
|
|
|
|
buf[0] = value & 0xff;
|
|
|
|
buf[1] = (value >> 8) & 0xff;
|
|
|
|
buf[2] = (value >> 16) & 0xff;
|
|
|
|
buf[3] = (value >> 24) & 0xff;
|
|
|
|
buf[4] = (value >> 32) & 0xff;
|
|
|
|
buf[5] = (value >> 40) & 0xff;
|
|
|
|
buf[6] = (value >> 48) & 0xff;
|
|
|
|
buf[7] = (value >> 56) & 0xff;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
void PutFixed32(std::string* dst, uint32_t value) {
|
|
|
|
char buf[sizeof(value)];
|
|
|
|
EncodeFixed32(buf, value);
|
|
|
|
dst->append(buf, sizeof(buf));
|
|
|
|
}
|
|
|
|
|
|
|
|
void PutFixed64(std::string* dst, uint64_t value) {
|
|
|
|
char buf[sizeof(value)];
|
|
|
|
EncodeFixed64(buf, value);
|
|
|
|
dst->append(buf, sizeof(buf));
|
|
|
|
}
|
|
|
|
|
|
|
|
char* EncodeVarint32(char* dst, uint32_t v) {
|
|
|
|
// Operate on characters as unsigneds
|
|
|
|
unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
|
|
|
|
static const int B = 128;
|
|
|
|
if (v < (1<<7)) {
|
|
|
|
*(ptr++) = v;
|
|
|
|
} else if (v < (1<<14)) {
|
|
|
|
*(ptr++) = v | B;
|
|
|
|
*(ptr++) = v>>7;
|
|
|
|
} else if (v < (1<<21)) {
|
|
|
|
*(ptr++) = v | B;
|
|
|
|
*(ptr++) = (v>>7) | B;
|
|
|
|
*(ptr++) = v>>14;
|
|
|
|
} else if (v < (1<<28)) {
|
|
|
|
*(ptr++) = v | B;
|
|
|
|
*(ptr++) = (v>>7) | B;
|
|
|
|
*(ptr++) = (v>>14) | B;
|
|
|
|
*(ptr++) = v>>21;
|
|
|
|
} else {
|
|
|
|
*(ptr++) = v | B;
|
|
|
|
*(ptr++) = (v>>7) | B;
|
|
|
|
*(ptr++) = (v>>14) | B;
|
|
|
|
*(ptr++) = (v>>21) | B;
|
|
|
|
*(ptr++) = v>>28;
|
|
|
|
}
|
|
|
|
return reinterpret_cast<char*>(ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void PutVarint32(std::string* dst, uint32_t v) {
|
|
|
|
char buf[5];
|
|
|
|
char* ptr = EncodeVarint32(buf, v);
|
|
|
|
dst->append(buf, ptr - buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
char* EncodeVarint64(char* dst, uint64_t v) {
|
2012-11-06 12:02:18 -08:00
|
|
|
static const unsigned int B = 128;
|
2011-03-18 22:37:00 +00:00
|
|
|
unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
|
|
|
|
while (v >= B) {
|
|
|
|
*(ptr++) = (v & (B-1)) | B;
|
|
|
|
v >>= 7;
|
|
|
|
}
|
2011-04-20 22:48:11 +00:00
|
|
|
*(ptr++) = static_cast<unsigned char>(v);
|
2011-03-18 22:37:00 +00:00
|
|
|
return reinterpret_cast<char*>(ptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void PutVarint64(std::string* dst, uint64_t v) {
|
|
|
|
char buf[10];
|
|
|
|
char* ptr = EncodeVarint64(buf, v);
|
|
|
|
dst->append(buf, ptr - buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
|
|
|
|
PutVarint32(dst, value.size());
|
|
|
|
dst->append(value.data(), value.size());
|
|
|
|
}
|
|
|
|
|
2013-11-07 12:37:58 -08:00
|
|
|
void PutLengthPrefixedSliceParts(std::string* dst,
|
|
|
|
const SliceParts& slice_parts) {
|
|
|
|
uint32_t total_bytes = 0;
|
|
|
|
for (int i = 0; i < slice_parts.num_parts; ++i) {
|
|
|
|
total_bytes += slice_parts.parts[i].size();
|
|
|
|
}
|
|
|
|
PutVarint32(dst, total_bytes);
|
|
|
|
for (int i = 0; i < slice_parts.num_parts; ++i) {
|
|
|
|
dst->append(slice_parts.parts[i].data(), slice_parts.parts[i].size());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-03-18 22:37:00 +00:00
|
|
|
int VarintLength(uint64_t v) {
|
|
|
|
int len = 1;
|
|
|
|
while (v >= 128) {
|
|
|
|
v >>= 7;
|
|
|
|
len++;
|
|
|
|
}
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* GetVarint32PtrFallback(const char* p,
|
|
|
|
const char* limit,
|
|
|
|
uint32_t* value) {
|
|
|
|
uint32_t result = 0;
|
|
|
|
for (uint32_t shift = 0; shift <= 28 && p < limit; shift += 7) {
|
|
|
|
uint32_t byte = *(reinterpret_cast<const unsigned char*>(p));
|
|
|
|
p++;
|
|
|
|
if (byte & 128) {
|
|
|
|
// More bytes are present
|
|
|
|
result |= ((byte & 127) << shift);
|
|
|
|
} else {
|
|
|
|
result |= (byte << shift);
|
|
|
|
*value = result;
|
|
|
|
return reinterpret_cast<const char*>(p);
|
|
|
|
}
|
|
|
|
}
|
2013-02-28 18:04:58 -08:00
|
|
|
return nullptr;
|
2011-03-18 22:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool GetVarint32(Slice* input, uint32_t* value) {
|
|
|
|
const char* p = input->data();
|
|
|
|
const char* limit = p + input->size();
|
|
|
|
const char* q = GetVarint32Ptr(p, limit, value);
|
2013-02-28 18:04:58 -08:00
|
|
|
if (q == nullptr) {
|
2011-03-18 22:37:00 +00:00
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
*input = Slice(q, limit - q);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* value) {
|
|
|
|
uint64_t result = 0;
|
|
|
|
for (uint32_t shift = 0; shift <= 63 && p < limit; shift += 7) {
|
|
|
|
uint64_t byte = *(reinterpret_cast<const unsigned char*>(p));
|
|
|
|
p++;
|
|
|
|
if (byte & 128) {
|
|
|
|
// More bytes are present
|
|
|
|
result |= ((byte & 127) << shift);
|
|
|
|
} else {
|
|
|
|
result |= (byte << shift);
|
|
|
|
*value = result;
|
|
|
|
return reinterpret_cast<const char*>(p);
|
|
|
|
}
|
|
|
|
}
|
2013-02-28 18:04:58 -08:00
|
|
|
return nullptr;
|
2011-03-18 22:37:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool GetVarint64(Slice* input, uint64_t* value) {
|
|
|
|
const char* p = input->data();
|
|
|
|
const char* limit = p + input->size();
|
|
|
|
const char* q = GetVarint64Ptr(p, limit, value);
|
2013-02-28 18:04:58 -08:00
|
|
|
if (q == nullptr) {
|
2011-03-18 22:37:00 +00:00
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
*input = Slice(q, limit - q);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const char* GetLengthPrefixedSlice(const char* p, const char* limit,
|
|
|
|
Slice* result) {
|
|
|
|
uint32_t len;
|
|
|
|
p = GetVarint32Ptr(p, limit, &len);
|
2013-02-28 18:04:58 -08:00
|
|
|
if (p == nullptr) return nullptr;
|
|
|
|
if (p + len > limit) return nullptr;
|
2011-03-18 22:37:00 +00:00
|
|
|
*result = Slice(p, len);
|
|
|
|
return p + len;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
|
|
|
|
uint32_t len;
|
|
|
|
if (GetVarint32(input, &len) &&
|
|
|
|
input->size() >= len) {
|
|
|
|
*result = Slice(input->data(), len);
|
|
|
|
input->remove_prefix(len);
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-08-22 23:10:02 -07:00
|
|
|
Slice GetLengthPrefixedSlice(const char* data) {
|
|
|
|
uint32_t len;
|
|
|
|
const char* p = data;
|
|
|
|
p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted
|
|
|
|
return Slice(p, len);
|
|
|
|
}
|
|
|
|
|
[RocksDB] BackupableDB
Summary:
In this diff I present you BackupableDB v1. You can easily use it to backup your DB and it will do incremental snapshots for you.
Let's first describe how you would use BackupableDB. It's inheriting StackableDB interface so you can easily construct it with your DB object -- it will add a method RollTheSnapshot() to the DB object. When you call RollTheSnapshot(), current snapshot of the DB will be stored in the backup dir. To restore, you can just call RestoreDBFromBackup() on a BackupableDB (which is a static method) and it will restore all files from the backup dir. In the next version, it will even support automatic backuping every X minutes.
There are multiple things you can configure:
1. backup_env and db_env can be different, which is awesome because then you can easily backup to HDFS or wherever you feel like.
2. sync - if true, it *guarantees* backup consistency on machine reboot
3. number of snapshots to keep - this will keep last N snapshots around if you want, for some reason, be able to restore from an earlier snapshot. All the backuping is done in incremental fashion - if we already have 00010.sst, we will not copy it again. *IMPORTANT* -- This is based on assumption that 00010.sst never changes - two files named 00010.sst from the same DB will always be exactly the same. Is this true? I always copy manifest, current and log files.
4. You can decide if you want to flush the memtables before you backup, or you're fine with backing up the log files -- either way, you get a complete and consistent view of the database at a time of backup.
5. More things you can find in BackupableDBOptions
Here is the directory structure I use:
backup_dir/CURRENT_SNAPSHOT - just 4 bytes holding the latest snapshot
0, 1, 2, ... - files containing serialized version of each snapshot - containing a list of files
files/*.sst - sst files shared between snapshots - if one snapshot references 00010.sst and another one needs to backup it from the DB, it will just reference the same file
files/ 0/, 1/, 2/, ... - snapshot directories containing private snapshot files - current, manifest and log files
All the files are ref counted and deleted immediatelly when they get out of scope.
Some other stuff in this diff:
1. Added GetEnv() method to the DB. Discussed with @haobo and we agreed that it seems right thing to do.
2. Fixed StackableDB interface. The way it was set up before, I was not able to implement BackupableDB.
Test Plan:
I have a unittest, but please don't look at this yet. I just hacked it up to help me with debugging. I will write a lot of good tests and update the diff.
Also, `make asan_check`
Reviewers: dhruba, haobo, emayanke
Reviewed By: dhruba
CC: leveldb, haobo
Differential Revision: https://reviews.facebook.net/D14295
2013-12-09 14:06:52 -08:00
|
|
|
Slice GetSliceUntil(Slice* slice, char delimiter) {
|
|
|
|
uint32_t len;
|
|
|
|
for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
|
|
|
|
// nothing
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice ret(slice->data(), len);
|
|
|
|
slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-12-07 10:42:19 -08:00
|
|
|
void BitStreamPutInt(char* dst, size_t dstlen, size_t offset,
|
|
|
|
uint32_t bits, uint64_t value) {
|
|
|
|
assert((offset + bits + 7)/8 <= dstlen);
|
|
|
|
assert(bits <= 64);
|
|
|
|
|
|
|
|
unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
|
|
|
|
|
|
|
|
size_t byteOffset = offset / 8;
|
|
|
|
size_t bitOffset = offset % 8;
|
|
|
|
|
|
|
|
// This prevents unused variable warnings when compiling.
|
|
|
|
#ifndef NDEBUG
|
|
|
|
// Store truncated value.
|
|
|
|
uint64_t origValue = (bits < 64)?(value & (((uint64_t)1 << bits) - 1)):value;
|
|
|
|
uint32_t origBits = bits;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
while (bits > 0) {
|
|
|
|
size_t bitsToGet = std::min<size_t>(bits, 8 - bitOffset);
|
|
|
|
unsigned char mask = ((1 << bitsToGet) - 1);
|
|
|
|
|
|
|
|
ptr[byteOffset] = (ptr[byteOffset] & ~(mask << bitOffset)) +
|
|
|
|
((value & mask) << bitOffset);
|
|
|
|
|
|
|
|
value >>= bitsToGet;
|
|
|
|
byteOffset += 1;
|
|
|
|
bitOffset = 0;
|
|
|
|
bits -= bitsToGet;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(origValue == BitStreamGetInt(dst, dstlen, offset, origBits));
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t BitStreamGetInt(const char* src, size_t srclen, size_t offset,
|
|
|
|
uint32_t bits) {
|
|
|
|
assert((offset + bits + 7)/8 <= srclen);
|
|
|
|
assert(bits <= 64);
|
|
|
|
|
|
|
|
const unsigned char* ptr = reinterpret_cast<const unsigned char*>(src);
|
|
|
|
|
|
|
|
uint64_t result = 0;
|
|
|
|
|
|
|
|
size_t byteOffset = offset / 8;
|
|
|
|
size_t bitOffset = offset % 8;
|
|
|
|
size_t shift = 0;
|
|
|
|
|
|
|
|
while (bits > 0) {
|
|
|
|
size_t bitsToGet = std::min<size_t>(bits, 8 - bitOffset);
|
|
|
|
unsigned char mask = ((1 << bitsToGet) - 1);
|
|
|
|
|
|
|
|
result += (uint64_t)((ptr[byteOffset] >> bitOffset) & mask) << shift;
|
|
|
|
|
|
|
|
shift += bitsToGet;
|
|
|
|
byteOffset += 1;
|
|
|
|
bitOffset = 0;
|
|
|
|
bits -= bitsToGet;
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void BitStreamPutInt(std::string* dst, size_t offset, uint32_t bits,
|
|
|
|
uint64_t value) {
|
|
|
|
assert((offset + bits + 7)/8 <= dst->size());
|
|
|
|
|
|
|
|
const size_t kTmpBufLen = sizeof(value) + 1;
|
|
|
|
char tmpBuf[kTmpBufLen];
|
|
|
|
|
|
|
|
// Number of bytes of tmpBuf being used
|
|
|
|
const size_t kUsedBytes = (offset%8 + bits)/8;
|
|
|
|
|
|
|
|
// Copy relevant parts of dst to tmpBuf
|
|
|
|
for (size_t idx = 0; idx <= kUsedBytes; ++idx) {
|
|
|
|
tmpBuf[idx] = (*dst)[offset/8 + idx];
|
|
|
|
}
|
|
|
|
|
|
|
|
BitStreamPutInt(tmpBuf, kTmpBufLen, offset%8, bits, value);
|
|
|
|
|
|
|
|
// Copy tmpBuf back to dst
|
|
|
|
for (size_t idx = 0; idx <= kUsedBytes; ++idx) {
|
|
|
|
(*dst)[offset/8 + idx] = tmpBuf[idx];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do the check here too as we are working with a buffer.
|
|
|
|
assert(((bits < 64)?(value & (((uint64_t)1 << bits) - 1)):value) ==
|
|
|
|
BitStreamGetInt(dst, offset, bits));
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t BitStreamGetInt(const std::string* src, size_t offset,
|
|
|
|
uint32_t bits) {
|
|
|
|
return BitStreamGetInt(src->data(), src->size(), offset, bits);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint64_t BitStreamGetInt(const Slice* src, size_t offset,
|
|
|
|
uint32_t bits) {
|
|
|
|
return BitStreamGetInt(src->data(), src->size(), offset, bits);
|
|
|
|
}
|
|
|
|
|
2013-10-03 21:49:15 -07:00
|
|
|
} // namespace rocksdb
|