Merge branch 'master' into performance
Conflicts: db/db_impl.cc db/db_test.cc db/memtable.cc db/version_set.cc include/rocksdb/statistics.h
This commit is contained in:
commit
f1cec73a76
17
Makefile
17
Makefile
@ -45,6 +45,8 @@ VALGRIND_VER := $(join $(VALGRIND_VER),valgrind)
|
||||
VALGRIND_OPTS = --error-exitcode=$(VALGRIND_ERROR) --leak-check=full
|
||||
|
||||
TESTS = \
|
||||
autovector_test \
|
||||
db_test \
|
||||
table_properties_collector_test \
|
||||
arena_test \
|
||||
auto_roll_logger_test \
|
||||
@ -74,12 +76,12 @@ TESTS = \
|
||||
skiplist_test \
|
||||
stringappend_test \
|
||||
ttl_test \
|
||||
backupable_db_test \
|
||||
version_edit_test \
|
||||
version_set_test \
|
||||
write_batch_test\
|
||||
deletefile_test \
|
||||
table_test \
|
||||
db_test
|
||||
table_test
|
||||
|
||||
TOOLS = \
|
||||
sst_dump \
|
||||
@ -125,7 +127,7 @@ $(SHARED2): $(SHARED3)
|
||||
endif
|
||||
|
||||
$(SHARED3):
|
||||
$(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(COVERAGEFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3)
|
||||
$(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(COVERAGEFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $@ $(LDFLAGS)
|
||||
|
||||
endif # PLATFORM_SHARED_EXT
|
||||
|
||||
@ -145,8 +147,9 @@ coverage:
|
||||
# Delete intermediate files
|
||||
find . -type f -regex ".*\.\(\(gcda\)\|\(gcno\)\)" -exec rm {} \;
|
||||
|
||||
check: all $(PROGRAMS) $(TESTS) $(TOOLS) ldb_tests
|
||||
check: all $(PROGRAMS) $(TESTS) $(TOOLS)
|
||||
for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done
|
||||
python tools/ldb_test.py
|
||||
|
||||
ldb_tests: all $(PROGRAMS) $(TOOLS)
|
||||
python tools/ldb_test.py
|
||||
@ -223,6 +226,9 @@ signal_test: util/signal_test.o $(LIBOBJECTS)
|
||||
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
autovector_test: util/autovector_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) util/autovector_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
table_properties_collector_test: db/table_properties_collector_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) db/table_properties_collector_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
@ -277,6 +283,9 @@ perf_context_test: db/perf_context_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
prefix_test: db/prefix_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) db/prefix_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS)
|
||||
|
||||
backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
ttl_test: utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS)
|
||||
$(CXX) utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
|
||||
|
||||
|
2
README
2
README
@ -79,4 +79,4 @@ include/rocksdb/statistics.h
|
||||
include/rocksdb/transaction_log.h
|
||||
An API to retrieve transaction logs from a database.
|
||||
|
||||
|
||||
Design discussions are conducted in https://www.facebook.com/groups/rocksdb.dev/
|
||||
|
@ -189,6 +189,18 @@ EOF
|
||||
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_ATOMIC_PRESENT"
|
||||
fi
|
||||
|
||||
# Test whether fallocate is available
|
||||
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||
#include <fcntl.h>
|
||||
int main() {
|
||||
int fd = open("/dev/null", 0);
|
||||
fallocate(fd, 0, 0, 1024);
|
||||
}
|
||||
EOF
|
||||
if [ "$?" = 0 ]; then
|
||||
COMMON_FLAGS="$PLATFORM_LDFLAGS -DROCKSDB_FALLOCATE_PRESENT"
|
||||
fi
|
||||
|
||||
# Test whether Snappy library is installed
|
||||
# http://code.google.com/p/snappy/
|
||||
$CXX $CFLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
|
||||
|
473
db/c.cc
473
db/c.cc
@ -20,6 +20,8 @@
|
||||
#include "rocksdb/options.h"
|
||||
#include "rocksdb/status.h"
|
||||
#include "rocksdb/write_batch.h"
|
||||
#include "rocksdb/memtablerep.h"
|
||||
#include "rocksdb/universal_compaction.h"
|
||||
|
||||
using rocksdb::Cache;
|
||||
using rocksdb::Comparator;
|
||||
@ -48,21 +50,21 @@ using std::shared_ptr;
|
||||
|
||||
extern "C" {
|
||||
|
||||
struct leveldb_t { DB* rep; };
|
||||
struct leveldb_iterator_t { Iterator* rep; };
|
||||
struct leveldb_writebatch_t { WriteBatch rep; };
|
||||
struct leveldb_snapshot_t { const Snapshot* rep; };
|
||||
struct leveldb_readoptions_t { ReadOptions rep; };
|
||||
struct leveldb_writeoptions_t { WriteOptions rep; };
|
||||
struct leveldb_options_t { Options rep; };
|
||||
struct leveldb_seqfile_t { SequentialFile* rep; };
|
||||
struct leveldb_randomfile_t { RandomAccessFile* rep; };
|
||||
struct leveldb_writablefile_t { WritableFile* rep; };
|
||||
struct leveldb_filelock_t { FileLock* rep; };
|
||||
struct leveldb_logger_t { shared_ptr<Logger> rep; };
|
||||
struct leveldb_cache_t { shared_ptr<Cache> rep; };
|
||||
struct rocksdb_t { DB* rep; };
|
||||
struct rocksdb_iterator_t { Iterator* rep; };
|
||||
struct rocksdb_writebatch_t { WriteBatch rep; };
|
||||
struct rocksdb_snapshot_t { const Snapshot* rep; };
|
||||
struct rocksdb_readoptions_t { ReadOptions rep; };
|
||||
struct rocksdb_writeoptions_t { WriteOptions rep; };
|
||||
struct rocksdb_options_t { Options rep; };
|
||||
struct rocksdb_seqfile_t { SequentialFile* rep; };
|
||||
struct rocksdb_randomfile_t { RandomAccessFile* rep; };
|
||||
struct rocksdb_writablefile_t { WritableFile* rep; };
|
||||
struct rocksdb_filelock_t { FileLock* rep; };
|
||||
struct rocksdb_logger_t { shared_ptr<Logger> rep; };
|
||||
struct rocksdb_cache_t { shared_ptr<Cache> rep; };
|
||||
|
||||
struct leveldb_comparator_t : public Comparator {
|
||||
struct rocksdb_comparator_t : public Comparator {
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
int (*compare_)(
|
||||
@ -71,7 +73,7 @@ struct leveldb_comparator_t : public Comparator {
|
||||
const char* b, size_t blen);
|
||||
const char* (*name_)(void*);
|
||||
|
||||
virtual ~leveldb_comparator_t() {
|
||||
virtual ~rocksdb_comparator_t() {
|
||||
(*destructor_)(state_);
|
||||
}
|
||||
|
||||
@ -88,7 +90,7 @@ struct leveldb_comparator_t : public Comparator {
|
||||
virtual void FindShortSuccessor(std::string* key) const { }
|
||||
};
|
||||
|
||||
struct leveldb_filterpolicy_t : public FilterPolicy {
|
||||
struct rocksdb_filterpolicy_t : public FilterPolicy {
|
||||
void* state_;
|
||||
void (*destructor_)(void*);
|
||||
const char* (*name_)(void*);
|
||||
@ -102,7 +104,7 @@ struct leveldb_filterpolicy_t : public FilterPolicy {
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length);
|
||||
|
||||
virtual ~leveldb_filterpolicy_t() {
|
||||
virtual ~rocksdb_filterpolicy_t() {
|
||||
(*destructor_)(state_);
|
||||
}
|
||||
|
||||
@ -129,11 +131,16 @@ struct leveldb_filterpolicy_t : public FilterPolicy {
|
||||
}
|
||||
};
|
||||
|
||||
struct leveldb_env_t {
|
||||
struct rocksdb_env_t {
|
||||
Env* rep;
|
||||
bool is_default;
|
||||
};
|
||||
|
||||
struct rocksdb_universal_compaction_options_t {
|
||||
rocksdb::CompactionOptionsUniversal *rep;
|
||||
};
|
||||
|
||||
|
||||
static bool SaveError(char** errptr, const Status& s) {
|
||||
assert(errptr != NULL);
|
||||
if (s.ok()) {
|
||||
@ -154,27 +161,27 @@ static char* CopyString(const std::string& str) {
|
||||
return result;
|
||||
}
|
||||
|
||||
leveldb_t* leveldb_open(
|
||||
const leveldb_options_t* options,
|
||||
rocksdb_t* rocksdb_open(
|
||||
const rocksdb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
DB* db;
|
||||
if (SaveError(errptr, DB::Open(options->rep, std::string(name), &db))) {
|
||||
return NULL;
|
||||
}
|
||||
leveldb_t* result = new leveldb_t;
|
||||
rocksdb_t* result = new rocksdb_t;
|
||||
result->rep = db;
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_close(leveldb_t* db) {
|
||||
void rocksdb_close(rocksdb_t* db) {
|
||||
delete db->rep;
|
||||
delete db;
|
||||
}
|
||||
|
||||
void leveldb_put(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
void rocksdb_put(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
const char* val, size_t vallen,
|
||||
char** errptr) {
|
||||
@ -182,26 +189,26 @@ void leveldb_put(
|
||||
db->rep->Put(options->rep, Slice(key, keylen), Slice(val, vallen)));
|
||||
}
|
||||
|
||||
void leveldb_delete(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
void rocksdb_delete(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
char** errptr) {
|
||||
SaveError(errptr, db->rep->Delete(options->rep, Slice(key, keylen)));
|
||||
}
|
||||
|
||||
|
||||
void leveldb_write(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
leveldb_writebatch_t* batch,
|
||||
void rocksdb_write(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_writeoptions_t* options,
|
||||
rocksdb_writebatch_t* batch,
|
||||
char** errptr) {
|
||||
SaveError(errptr, db->rep->Write(options->rep, &batch->rep));
|
||||
}
|
||||
|
||||
char* leveldb_get(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options,
|
||||
char* rocksdb_get(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_readoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
size_t* vallen,
|
||||
char** errptr) {
|
||||
@ -220,30 +227,30 @@ char* leveldb_get(
|
||||
return result;
|
||||
}
|
||||
|
||||
leveldb_iterator_t* leveldb_create_iterator(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options) {
|
||||
leveldb_iterator_t* result = new leveldb_iterator_t;
|
||||
rocksdb_iterator_t* rocksdb_create_iterator(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_readoptions_t* options) {
|
||||
rocksdb_iterator_t* result = new rocksdb_iterator_t;
|
||||
result->rep = db->rep->NewIterator(options->rep);
|
||||
return result;
|
||||
}
|
||||
|
||||
const leveldb_snapshot_t* leveldb_create_snapshot(
|
||||
leveldb_t* db) {
|
||||
leveldb_snapshot_t* result = new leveldb_snapshot_t;
|
||||
const rocksdb_snapshot_t* rocksdb_create_snapshot(
|
||||
rocksdb_t* db) {
|
||||
rocksdb_snapshot_t* result = new rocksdb_snapshot_t;
|
||||
result->rep = db->rep->GetSnapshot();
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_release_snapshot(
|
||||
leveldb_t* db,
|
||||
const leveldb_snapshot_t* snapshot) {
|
||||
void rocksdb_release_snapshot(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_snapshot_t* snapshot) {
|
||||
db->rep->ReleaseSnapshot(snapshot->rep);
|
||||
delete snapshot;
|
||||
}
|
||||
|
||||
char* leveldb_property_value(
|
||||
leveldb_t* db,
|
||||
char* rocksdb_property_value(
|
||||
rocksdb_t* db,
|
||||
const char* propname) {
|
||||
std::string tmp;
|
||||
if (db->rep->GetProperty(Slice(propname), &tmp)) {
|
||||
@ -254,8 +261,8 @@ char* leveldb_property_value(
|
||||
}
|
||||
}
|
||||
|
||||
void leveldb_approximate_sizes(
|
||||
leveldb_t* db,
|
||||
void rocksdb_approximate_sizes(
|
||||
rocksdb_t* db,
|
||||
int num_ranges,
|
||||
const char* const* range_start_key, const size_t* range_start_key_len,
|
||||
const char* const* range_limit_key, const size_t* range_limit_key_len,
|
||||
@ -269,8 +276,8 @@ void leveldb_approximate_sizes(
|
||||
delete[] ranges;
|
||||
}
|
||||
|
||||
void leveldb_compact_range(
|
||||
leveldb_t* db,
|
||||
void rocksdb_compact_range(
|
||||
rocksdb_t* db,
|
||||
const char* start_key, size_t start_key_len,
|
||||
const char* limit_key, size_t limit_key_len) {
|
||||
Slice a, b;
|
||||
@ -280,92 +287,92 @@ void leveldb_compact_range(
|
||||
(limit_key ? (b = Slice(limit_key, limit_key_len), &b) : NULL));
|
||||
}
|
||||
|
||||
void leveldb_destroy_db(
|
||||
const leveldb_options_t* options,
|
||||
void rocksdb_destroy_db(
|
||||
const rocksdb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
SaveError(errptr, DestroyDB(name, options->rep));
|
||||
}
|
||||
|
||||
void leveldb_repair_db(
|
||||
const leveldb_options_t* options,
|
||||
void rocksdb_repair_db(
|
||||
const rocksdb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr) {
|
||||
SaveError(errptr, RepairDB(name, options->rep));
|
||||
}
|
||||
|
||||
void leveldb_iter_destroy(leveldb_iterator_t* iter) {
|
||||
void rocksdb_iter_destroy(rocksdb_iterator_t* iter) {
|
||||
delete iter->rep;
|
||||
delete iter;
|
||||
}
|
||||
|
||||
unsigned char leveldb_iter_valid(const leveldb_iterator_t* iter) {
|
||||
unsigned char rocksdb_iter_valid(const rocksdb_iterator_t* iter) {
|
||||
return iter->rep->Valid();
|
||||
}
|
||||
|
||||
void leveldb_iter_seek_to_first(leveldb_iterator_t* iter) {
|
||||
void rocksdb_iter_seek_to_first(rocksdb_iterator_t* iter) {
|
||||
iter->rep->SeekToFirst();
|
||||
}
|
||||
|
||||
void leveldb_iter_seek_to_last(leveldb_iterator_t* iter) {
|
||||
void rocksdb_iter_seek_to_last(rocksdb_iterator_t* iter) {
|
||||
iter->rep->SeekToLast();
|
||||
}
|
||||
|
||||
void leveldb_iter_seek(leveldb_iterator_t* iter, const char* k, size_t klen) {
|
||||
void rocksdb_iter_seek(rocksdb_iterator_t* iter, const char* k, size_t klen) {
|
||||
iter->rep->Seek(Slice(k, klen));
|
||||
}
|
||||
|
||||
void leveldb_iter_next(leveldb_iterator_t* iter) {
|
||||
void rocksdb_iter_next(rocksdb_iterator_t* iter) {
|
||||
iter->rep->Next();
|
||||
}
|
||||
|
||||
void leveldb_iter_prev(leveldb_iterator_t* iter) {
|
||||
void rocksdb_iter_prev(rocksdb_iterator_t* iter) {
|
||||
iter->rep->Prev();
|
||||
}
|
||||
|
||||
const char* leveldb_iter_key(const leveldb_iterator_t* iter, size_t* klen) {
|
||||
const char* rocksdb_iter_key(const rocksdb_iterator_t* iter, size_t* klen) {
|
||||
Slice s = iter->rep->key();
|
||||
*klen = s.size();
|
||||
return s.data();
|
||||
}
|
||||
|
||||
const char* leveldb_iter_value(const leveldb_iterator_t* iter, size_t* vlen) {
|
||||
const char* rocksdb_iter_value(const rocksdb_iterator_t* iter, size_t* vlen) {
|
||||
Slice s = iter->rep->value();
|
||||
*vlen = s.size();
|
||||
return s.data();
|
||||
}
|
||||
|
||||
void leveldb_iter_get_error(const leveldb_iterator_t* iter, char** errptr) {
|
||||
void rocksdb_iter_get_error(const rocksdb_iterator_t* iter, char** errptr) {
|
||||
SaveError(errptr, iter->rep->status());
|
||||
}
|
||||
|
||||
leveldb_writebatch_t* leveldb_writebatch_create() {
|
||||
return new leveldb_writebatch_t;
|
||||
rocksdb_writebatch_t* rocksdb_writebatch_create() {
|
||||
return new rocksdb_writebatch_t;
|
||||
}
|
||||
|
||||
void leveldb_writebatch_destroy(leveldb_writebatch_t* b) {
|
||||
void rocksdb_writebatch_destroy(rocksdb_writebatch_t* b) {
|
||||
delete b;
|
||||
}
|
||||
|
||||
void leveldb_writebatch_clear(leveldb_writebatch_t* b) {
|
||||
void rocksdb_writebatch_clear(rocksdb_writebatch_t* b) {
|
||||
b->rep.Clear();
|
||||
}
|
||||
|
||||
void leveldb_writebatch_put(
|
||||
leveldb_writebatch_t* b,
|
||||
void rocksdb_writebatch_put(
|
||||
rocksdb_writebatch_t* b,
|
||||
const char* key, size_t klen,
|
||||
const char* val, size_t vlen) {
|
||||
b->rep.Put(Slice(key, klen), Slice(val, vlen));
|
||||
}
|
||||
|
||||
void leveldb_writebatch_delete(
|
||||
leveldb_writebatch_t* b,
|
||||
void rocksdb_writebatch_delete(
|
||||
rocksdb_writebatch_t* b,
|
||||
const char* key, size_t klen) {
|
||||
b->rep.Delete(Slice(key, klen));
|
||||
}
|
||||
|
||||
void leveldb_writebatch_iterate(
|
||||
leveldb_writebatch_t* b,
|
||||
void rocksdb_writebatch_iterate(
|
||||
rocksdb_writebatch_t* b,
|
||||
void* state,
|
||||
void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
|
||||
void (*deleted)(void*, const char* k, size_t klen)) {
|
||||
@ -388,132 +395,132 @@ void leveldb_writebatch_iterate(
|
||||
b->rep.Iterate(&handler);
|
||||
}
|
||||
|
||||
leveldb_options_t* leveldb_options_create() {
|
||||
return new leveldb_options_t;
|
||||
rocksdb_options_t* rocksdb_options_create() {
|
||||
return new rocksdb_options_t;
|
||||
}
|
||||
|
||||
void leveldb_options_destroy(leveldb_options_t* options) {
|
||||
void rocksdb_options_destroy(rocksdb_options_t* options) {
|
||||
delete options;
|
||||
}
|
||||
|
||||
void leveldb_options_set_comparator(
|
||||
leveldb_options_t* opt,
|
||||
leveldb_comparator_t* cmp) {
|
||||
void rocksdb_options_set_comparator(
|
||||
rocksdb_options_t* opt,
|
||||
rocksdb_comparator_t* cmp) {
|
||||
opt->rep.comparator = cmp;
|
||||
}
|
||||
|
||||
void leveldb_options_set_filter_policy(
|
||||
leveldb_options_t* opt,
|
||||
leveldb_filterpolicy_t* policy) {
|
||||
void rocksdb_options_set_filter_policy(
|
||||
rocksdb_options_t* opt,
|
||||
rocksdb_filterpolicy_t* policy) {
|
||||
opt->rep.filter_policy = policy;
|
||||
}
|
||||
|
||||
void leveldb_options_set_create_if_missing(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
void rocksdb_options_set_create_if_missing(
|
||||
rocksdb_options_t* opt, unsigned char v) {
|
||||
opt->rep.create_if_missing = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_error_if_exists(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
void rocksdb_options_set_error_if_exists(
|
||||
rocksdb_options_t* opt, unsigned char v) {
|
||||
opt->rep.error_if_exists = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_paranoid_checks(
|
||||
leveldb_options_t* opt, unsigned char v) {
|
||||
void rocksdb_options_set_paranoid_checks(
|
||||
rocksdb_options_t* opt, unsigned char v) {
|
||||
opt->rep.paranoid_checks = v;
|
||||
}
|
||||
|
||||
void leveldb_options_set_env(leveldb_options_t* opt, leveldb_env_t* env) {
|
||||
void rocksdb_options_set_env(rocksdb_options_t* opt, rocksdb_env_t* env) {
|
||||
opt->rep.env = (env ? env->rep : NULL);
|
||||
}
|
||||
|
||||
void leveldb_options_set_info_log(leveldb_options_t* opt, leveldb_logger_t* l) {
|
||||
void rocksdb_options_set_info_log(rocksdb_options_t* opt, rocksdb_logger_t* l) {
|
||||
if (l) {
|
||||
opt->rep.info_log = l->rep;
|
||||
}
|
||||
}
|
||||
|
||||
void leveldb_options_set_write_buffer_size(leveldb_options_t* opt, size_t s) {
|
||||
void rocksdb_options_set_write_buffer_size(rocksdb_options_t* opt, size_t s) {
|
||||
opt->rep.write_buffer_size = s;
|
||||
}
|
||||
|
||||
void leveldb_options_set_max_open_files(leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_max_open_files(rocksdb_options_t* opt, int n) {
|
||||
opt->rep.max_open_files = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_cache(leveldb_options_t* opt, leveldb_cache_t* c) {
|
||||
void rocksdb_options_set_cache(rocksdb_options_t* opt, rocksdb_cache_t* c) {
|
||||
if (c) {
|
||||
opt->rep.block_cache = c->rep;
|
||||
}
|
||||
}
|
||||
|
||||
void leveldb_options_set_block_size(leveldb_options_t* opt, size_t s) {
|
||||
void rocksdb_options_set_block_size(rocksdb_options_t* opt, size_t s) {
|
||||
opt->rep.block_size = s;
|
||||
}
|
||||
|
||||
void leveldb_options_set_block_restart_interval(leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_block_restart_interval(rocksdb_options_t* opt, int n) {
|
||||
opt->rep.block_restart_interval = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_target_file_size_base(
|
||||
leveldb_options_t* opt, uint64_t n) {
|
||||
void rocksdb_options_set_target_file_size_base(
|
||||
rocksdb_options_t* opt, uint64_t n) {
|
||||
opt->rep.target_file_size_base = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_target_file_size_multiplier(
|
||||
leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_target_file_size_multiplier(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.target_file_size_multiplier = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_max_bytes_for_level_base(
|
||||
leveldb_options_t* opt, uint64_t n) {
|
||||
void rocksdb_options_set_max_bytes_for_level_base(
|
||||
rocksdb_options_t* opt, uint64_t n) {
|
||||
opt->rep.max_bytes_for_level_base = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_max_bytes_for_level_multiplier(
|
||||
leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_max_bytes_for_level_multiplier(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.max_bytes_for_level_multiplier = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_expanded_compaction_factor(
|
||||
leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_expanded_compaction_factor(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.expanded_compaction_factor = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_max_grandparent_overlap_factor(
|
||||
leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_max_grandparent_overlap_factor(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.max_grandparent_overlap_factor = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_num_levels(leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
|
||||
opt->rep.num_levels = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_level0_file_num_compaction_trigger(
|
||||
leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_level0_file_num_compaction_trigger(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.level0_file_num_compaction_trigger = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_level0_slowdown_writes_trigger(
|
||||
leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_level0_slowdown_writes_trigger(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.level0_slowdown_writes_trigger = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_level0_stop_writes_trigger(
|
||||
leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_level0_stop_writes_trigger(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.level0_stop_writes_trigger = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_max_mem_compaction_level(
|
||||
leveldb_options_t* opt, int n) {
|
||||
void rocksdb_options_set_max_mem_compaction_level(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.max_mem_compaction_level = n;
|
||||
}
|
||||
|
||||
void leveldb_options_set_compression(leveldb_options_t* opt, int t) {
|
||||
void rocksdb_options_set_compression(rocksdb_options_t* opt, int t) {
|
||||
opt->rep.compression = static_cast<CompressionType>(t);
|
||||
}
|
||||
|
||||
void leveldb_options_set_compression_per_level(leveldb_options_t* opt,
|
||||
void rocksdb_options_set_compression_per_level(rocksdb_options_t* opt,
|
||||
int* level_values,
|
||||
size_t num_levels) {
|
||||
opt->rep.compression_per_level.resize(num_levels);
|
||||
@ -523,43 +530,132 @@ void leveldb_options_set_compression_per_level(leveldb_options_t* opt,
|
||||
}
|
||||
}
|
||||
|
||||
void leveldb_options_set_compression_options(
|
||||
leveldb_options_t* opt, int w_bits, int level, int strategy) {
|
||||
void rocksdb_options_set_compression_options(
|
||||
rocksdb_options_t* opt, int w_bits, int level, int strategy) {
|
||||
opt->rep.compression_opts.window_bits = w_bits;
|
||||
opt->rep.compression_opts.level = level;
|
||||
opt->rep.compression_opts.strategy = strategy;
|
||||
}
|
||||
|
||||
void leveldb_options_set_disable_data_sync(
|
||||
leveldb_options_t* opt, bool disable_data_sync) {
|
||||
void rocksdb_options_set_disable_data_sync(
|
||||
rocksdb_options_t* opt, int disable_data_sync) {
|
||||
opt->rep.disableDataSync = disable_data_sync;
|
||||
}
|
||||
|
||||
void leveldb_options_set_use_fsync(
|
||||
leveldb_options_t* opt, bool use_fsync) {
|
||||
void rocksdb_options_set_use_fsync(
|
||||
rocksdb_options_t* opt, int use_fsync) {
|
||||
opt->rep.use_fsync = use_fsync;
|
||||
}
|
||||
|
||||
void leveldb_options_set_db_stats_log_interval(
|
||||
leveldb_options_t* opt, int db_stats_log_interval) {
|
||||
void rocksdb_options_set_db_stats_log_interval(
|
||||
rocksdb_options_t* opt, int db_stats_log_interval) {
|
||||
opt->rep.db_stats_log_interval = db_stats_log_interval;
|
||||
}
|
||||
|
||||
void leveldb_options_set_db_log_dir(
|
||||
leveldb_options_t* opt, const char* db_log_dir) {
|
||||
void rocksdb_options_set_db_log_dir(
|
||||
rocksdb_options_t* opt, const char* db_log_dir) {
|
||||
opt->rep.db_log_dir = db_log_dir;
|
||||
}
|
||||
|
||||
void leveldb_options_set_WAL_ttl_seconds(leveldb_options_t* opt, uint64_t ttl) {
|
||||
void rocksdb_options_set_WAL_ttl_seconds(rocksdb_options_t* opt, uint64_t ttl) {
|
||||
opt->rep.WAL_ttl_seconds = ttl;
|
||||
}
|
||||
|
||||
void leveldb_options_set_WAL_size_limit_MB(
|
||||
leveldb_options_t* opt, uint64_t limit) {
|
||||
void rocksdb_options_set_WAL_size_limit_MB(
|
||||
rocksdb_options_t* opt, uint64_t limit) {
|
||||
opt->rep.WAL_size_limit_MB = limit;
|
||||
}
|
||||
|
||||
leveldb_comparator_t* leveldb_comparator_create(
|
||||
void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t* opt, int n) {
|
||||
opt->rep.max_write_buffer_number = n;
|
||||
}
|
||||
|
||||
void rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t* opt, int n) {
|
||||
opt->rep.min_write_buffer_number_to_merge = n;
|
||||
}
|
||||
|
||||
void rocksdb_options_set_max_background_compactions(rocksdb_options_t* opt, int n) {
|
||||
opt->rep.max_background_compactions = n;
|
||||
}
|
||||
|
||||
void rocksdb_options_set_max_background_flushes(rocksdb_options_t* opt, int n) {
|
||||
opt->rep.max_background_flushes = n;
|
||||
}
|
||||
|
||||
void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t* opt, int disable) {
|
||||
opt->rep.disable_auto_compactions = disable;
|
||||
}
|
||||
|
||||
void rocksdb_options_set_disable_seek_compaction(rocksdb_options_t* opt, int disable) {
|
||||
opt->rep.disable_seek_compaction = disable;
|
||||
}
|
||||
|
||||
void rocksdb_options_set_source_compaction_factor(
|
||||
rocksdb_options_t* opt, int n) {
|
||||
opt->rep.expanded_compaction_factor = n;
|
||||
}
|
||||
|
||||
void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t* opt) {
|
||||
opt->rep.PrepareForBulkLoad();
|
||||
}
|
||||
|
||||
void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t *opt) {
|
||||
static rocksdb::VectorRepFactory* factory = 0;
|
||||
if (!factory) {
|
||||
factory = new rocksdb::VectorRepFactory;
|
||||
}
|
||||
opt->rep.memtable_factory.reset(factory);
|
||||
}
|
||||
|
||||
void rocksdb_options_set_compaction_style(rocksdb_options_t *opt, int style) {
|
||||
opt->rep.compaction_style = static_cast<rocksdb::CompactionStyle>(style);
|
||||
}
|
||||
|
||||
void rocksdb_options_set_universal_compaction_options(rocksdb_options_t *opt, rocksdb_universal_compaction_options_t *uco) {
|
||||
opt->rep.compaction_options_universal = *(uco->rep);
|
||||
}
|
||||
|
||||
/*
|
||||
TODO:
|
||||
merge_operator
|
||||
compaction_filter
|
||||
prefix_extractor
|
||||
whole_key_filtering
|
||||
max_bytes_for_level_multiplier_additional
|
||||
delete_obsolete_files_period_micros
|
||||
max_log_file_size
|
||||
log_file_time_to_roll
|
||||
keep_log_file_num
|
||||
soft_rate_limit
|
||||
hard_rate_limit
|
||||
rate_limit_delay_max_milliseconds
|
||||
max_manifest_file_size
|
||||
no_block_cache
|
||||
table_cache_numshardbits
|
||||
table_cache_remove_scan_count_limit
|
||||
arena_block_size
|
||||
manifest_preallocation_size
|
||||
purge_redundant_kvs_while_flush
|
||||
allow_os_buffer
|
||||
allow_mmap_reads
|
||||
allow_mmap_writes
|
||||
is_fd_close_on_exec
|
||||
skip_log_error_on_recovery
|
||||
stats_dump_period_sec
|
||||
block_size_deviation
|
||||
advise_random_on_open
|
||||
access_hint_on_compaction_start
|
||||
use_adaptive_mutex
|
||||
bytes_per_sync
|
||||
filter_deletes
|
||||
max_sequential_skip_in_iterations
|
||||
table_factory
|
||||
table_properties_collectors
|
||||
inplace_update_support
|
||||
inplace_update_num_locks
|
||||
*/
|
||||
|
||||
rocksdb_comparator_t* rocksdb_comparator_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
int (*compare)(
|
||||
@ -567,7 +663,7 @@ leveldb_comparator_t* leveldb_comparator_create(
|
||||
const char* a, size_t alen,
|
||||
const char* b, size_t blen),
|
||||
const char* (*name)(void*)) {
|
||||
leveldb_comparator_t* result = new leveldb_comparator_t;
|
||||
rocksdb_comparator_t* result = new rocksdb_comparator_t;
|
||||
result->state_ = state;
|
||||
result->destructor_ = destructor;
|
||||
result->compare_ = compare;
|
||||
@ -575,11 +671,11 @@ leveldb_comparator_t* leveldb_comparator_create(
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_comparator_destroy(leveldb_comparator_t* cmp) {
|
||||
void rocksdb_comparator_destroy(rocksdb_comparator_t* cmp) {
|
||||
delete cmp;
|
||||
}
|
||||
|
||||
leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
char* (*create_filter)(
|
||||
@ -592,7 +688,7 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
const char* (*name)(void*)) {
|
||||
leveldb_filterpolicy_t* result = new leveldb_filterpolicy_t;
|
||||
rocksdb_filterpolicy_t* result = new rocksdb_filterpolicy_t;
|
||||
result->state_ = state;
|
||||
result->destructor_ = destructor;
|
||||
result->create_ = create_filter;
|
||||
@ -601,15 +697,15 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t* filter) {
|
||||
void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t* filter) {
|
||||
delete filter;
|
||||
}
|
||||
|
||||
leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
|
||||
// Make a leveldb_filterpolicy_t, but override all of its methods so
|
||||
rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(int bits_per_key) {
|
||||
// Make a rocksdb_filterpolicy_t, but override all of its methods so
|
||||
// they delegate to a NewBloomFilterPolicy() instead of user
|
||||
// supplied C functions.
|
||||
struct Wrapper : public leveldb_filterpolicy_t {
|
||||
struct Wrapper : public rocksdb_filterpolicy_t {
|
||||
const FilterPolicy* rep_;
|
||||
~Wrapper() { delete rep_; }
|
||||
const char* Name() const { return rep_->Name(); }
|
||||
@ -628,64 +724,115 @@ leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(int bits_per_key) {
|
||||
return wrapper;
|
||||
}
|
||||
|
||||
leveldb_readoptions_t* leveldb_readoptions_create() {
|
||||
return new leveldb_readoptions_t;
|
||||
rocksdb_readoptions_t* rocksdb_readoptions_create() {
|
||||
return new rocksdb_readoptions_t;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_destroy(leveldb_readoptions_t* opt) {
|
||||
void rocksdb_readoptions_destroy(rocksdb_readoptions_t* opt) {
|
||||
delete opt;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_set_verify_checksums(
|
||||
leveldb_readoptions_t* opt,
|
||||
void rocksdb_readoptions_set_verify_checksums(
|
||||
rocksdb_readoptions_t* opt,
|
||||
unsigned char v) {
|
||||
opt->rep.verify_checksums = v;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_set_fill_cache(
|
||||
leveldb_readoptions_t* opt, unsigned char v) {
|
||||
void rocksdb_readoptions_set_fill_cache(
|
||||
rocksdb_readoptions_t* opt, unsigned char v) {
|
||||
opt->rep.fill_cache = v;
|
||||
}
|
||||
|
||||
void leveldb_readoptions_set_snapshot(
|
||||
leveldb_readoptions_t* opt,
|
||||
const leveldb_snapshot_t* snap) {
|
||||
void rocksdb_readoptions_set_snapshot(
|
||||
rocksdb_readoptions_t* opt,
|
||||
const rocksdb_snapshot_t* snap) {
|
||||
opt->rep.snapshot = (snap ? snap->rep : NULL);
|
||||
}
|
||||
|
||||
leveldb_writeoptions_t* leveldb_writeoptions_create() {
|
||||
return new leveldb_writeoptions_t;
|
||||
rocksdb_writeoptions_t* rocksdb_writeoptions_create() {
|
||||
return new rocksdb_writeoptions_t;
|
||||
}
|
||||
|
||||
void leveldb_writeoptions_destroy(leveldb_writeoptions_t* opt) {
|
||||
void rocksdb_writeoptions_destroy(rocksdb_writeoptions_t* opt) {
|
||||
delete opt;
|
||||
}
|
||||
|
||||
void leveldb_writeoptions_set_sync(
|
||||
leveldb_writeoptions_t* opt, unsigned char v) {
|
||||
void rocksdb_writeoptions_set_sync(
|
||||
rocksdb_writeoptions_t* opt, unsigned char v) {
|
||||
opt->rep.sync = v;
|
||||
}
|
||||
|
||||
leveldb_cache_t* leveldb_cache_create_lru(size_t capacity) {
|
||||
leveldb_cache_t* c = new leveldb_cache_t;
|
||||
void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable) {
|
||||
opt->rep.disableWAL = disable;
|
||||
}
|
||||
|
||||
|
||||
rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity) {
|
||||
rocksdb_cache_t* c = new rocksdb_cache_t;
|
||||
c->rep = NewLRUCache(capacity);
|
||||
return c;
|
||||
}
|
||||
|
||||
void leveldb_cache_destroy(leveldb_cache_t* cache) {
|
||||
void rocksdb_cache_destroy(rocksdb_cache_t* cache) {
|
||||
delete cache;
|
||||
}
|
||||
|
||||
leveldb_env_t* leveldb_create_default_env() {
|
||||
leveldb_env_t* result = new leveldb_env_t;
|
||||
rocksdb_env_t* rocksdb_create_default_env() {
|
||||
rocksdb_env_t* result = new rocksdb_env_t;
|
||||
result->rep = Env::Default();
|
||||
result->is_default = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
void leveldb_env_destroy(leveldb_env_t* env) {
|
||||
void rocksdb_env_set_background_threads(rocksdb_env_t* env, int n) {
|
||||
env->rep->SetBackgroundThreads(n);
|
||||
}
|
||||
|
||||
void rocksdb_env_destroy(rocksdb_env_t* env) {
|
||||
if (!env->is_default) delete env->rep;
|
||||
delete env;
|
||||
}
|
||||
|
||||
rocksdb_universal_compaction_options_t* rocksdb_universal_compaction_options_create() {
|
||||
rocksdb_universal_compaction_options_t* result = new rocksdb_universal_compaction_options_t;
|
||||
result->rep = new rocksdb::CompactionOptionsUniversal;
|
||||
return result;
|
||||
}
|
||||
|
||||
void rocksdb_universal_compaction_options_set_size_ratio(
|
||||
rocksdb_universal_compaction_options_t* uco, int ratio) {
|
||||
uco->rep->size_ratio = ratio;
|
||||
}
|
||||
|
||||
void rocksdb_universal_compaction_options_set_min_merge_width(
|
||||
rocksdb_universal_compaction_options_t* uco, int w) {
|
||||
uco->rep->min_merge_width = w;
|
||||
}
|
||||
|
||||
void rocksdb_universal_compaction_options_set_max_merge_width(
|
||||
rocksdb_universal_compaction_options_t* uco, int w) {
|
||||
uco->rep->max_merge_width = w;
|
||||
}
|
||||
|
||||
void rocksdb_universal_compaction_options_set_max_size_amplification_percent(
|
||||
rocksdb_universal_compaction_options_t* uco, int p) {
|
||||
uco->rep->max_size_amplification_percent = p;
|
||||
}
|
||||
|
||||
void rocksdb_universal_compaction_options_set_compression_size_percent(
|
||||
rocksdb_universal_compaction_options_t* uco, int p) {
|
||||
uco->rep->compression_size_percent = p;
|
||||
}
|
||||
|
||||
void rocksdb_universal_compaction_options_set_stop_style(
|
||||
rocksdb_universal_compaction_options_t* uco, int style) {
|
||||
uco->rep->stop_style = static_cast<rocksdb::CompactionStopStyle>(style);
|
||||
}
|
||||
|
||||
void rocksdb_universal_compaction_options_destroy(
|
||||
rocksdb_universal_compaction_options_t* uco) {
|
||||
delete uco->rep;
|
||||
delete uco;
|
||||
}
|
||||
|
||||
} // end extern "C"
|
||||
|
212
db/c_test.c
212
db/c_test.c
@ -62,30 +62,30 @@ static void Free(char** ptr) {
|
||||
}
|
||||
|
||||
static void CheckGet(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options,
|
||||
rocksdb_t* db,
|
||||
const rocksdb_readoptions_t* options,
|
||||
const char* key,
|
||||
const char* expected) {
|
||||
char* err = NULL;
|
||||
size_t val_len;
|
||||
char* val;
|
||||
val = leveldb_get(db, options, key, strlen(key), &val_len, &err);
|
||||
val = rocksdb_get(db, options, key, strlen(key), &val_len, &err);
|
||||
CheckNoError(err);
|
||||
CheckEqual(expected, val, val_len);
|
||||
Free(&val);
|
||||
}
|
||||
|
||||
static void CheckIter(leveldb_iterator_t* iter,
|
||||
static void CheckIter(rocksdb_iterator_t* iter,
|
||||
const char* key, const char* val) {
|
||||
size_t len;
|
||||
const char* str;
|
||||
str = leveldb_iter_key(iter, &len);
|
||||
str = rocksdb_iter_key(iter, &len);
|
||||
CheckEqual(key, str, len);
|
||||
str = leveldb_iter_value(iter, &len);
|
||||
str = rocksdb_iter_value(iter, &len);
|
||||
CheckEqual(val, str, len);
|
||||
}
|
||||
|
||||
// Callback from leveldb_writebatch_iterate()
|
||||
// Callback from rocksdb_writebatch_iterate()
|
||||
static void CheckPut(void* ptr,
|
||||
const char* k, size_t klen,
|
||||
const char* v, size_t vlen) {
|
||||
@ -104,7 +104,7 @@ static void CheckPut(void* ptr,
|
||||
(*state)++;
|
||||
}
|
||||
|
||||
// Callback from leveldb_writebatch_iterate()
|
||||
// Callback from rocksdb_writebatch_iterate()
|
||||
static void CheckDel(void* ptr, const char* k, size_t klen) {
|
||||
int* state = (int*) ptr;
|
||||
CheckCondition(*state == 2);
|
||||
@ -155,117 +155,117 @@ unsigned char FilterKeyMatch(
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
leveldb_t* db;
|
||||
leveldb_comparator_t* cmp;
|
||||
leveldb_cache_t* cache;
|
||||
leveldb_env_t* env;
|
||||
leveldb_options_t* options;
|
||||
leveldb_readoptions_t* roptions;
|
||||
leveldb_writeoptions_t* woptions;
|
||||
rocksdb_t* db;
|
||||
rocksdb_comparator_t* cmp;
|
||||
rocksdb_cache_t* cache;
|
||||
rocksdb_env_t* env;
|
||||
rocksdb_options_t* options;
|
||||
rocksdb_readoptions_t* roptions;
|
||||
rocksdb_writeoptions_t* woptions;
|
||||
char* err = NULL;
|
||||
int run = -1;
|
||||
|
||||
snprintf(dbname, sizeof(dbname),
|
||||
"%s/leveldb_c_test-%d",
|
||||
"%s/rocksdb_c_test-%d",
|
||||
GetTempDir(),
|
||||
((int) geteuid()));
|
||||
|
||||
StartPhase("create_objects");
|
||||
cmp = leveldb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName);
|
||||
env = leveldb_create_default_env();
|
||||
cache = leveldb_cache_create_lru(100000);
|
||||
cmp = rocksdb_comparator_create(NULL, CmpDestroy, CmpCompare, CmpName);
|
||||
env = rocksdb_create_default_env();
|
||||
cache = rocksdb_cache_create_lru(100000);
|
||||
|
||||
options = leveldb_options_create();
|
||||
leveldb_options_set_comparator(options, cmp);
|
||||
leveldb_options_set_error_if_exists(options, 1);
|
||||
leveldb_options_set_cache(options, cache);
|
||||
leveldb_options_set_env(options, env);
|
||||
leveldb_options_set_info_log(options, NULL);
|
||||
leveldb_options_set_write_buffer_size(options, 100000);
|
||||
leveldb_options_set_paranoid_checks(options, 1);
|
||||
leveldb_options_set_max_open_files(options, 10);
|
||||
leveldb_options_set_block_size(options, 1024);
|
||||
leveldb_options_set_block_restart_interval(options, 8);
|
||||
leveldb_options_set_compression(options, leveldb_no_compression);
|
||||
leveldb_options_set_compression_options(options, -14, -1, 0);
|
||||
int compression_levels[] = {leveldb_no_compression, leveldb_no_compression,
|
||||
leveldb_no_compression, leveldb_no_compression};
|
||||
leveldb_options_set_compression_per_level(options, compression_levels, 4);
|
||||
options = rocksdb_options_create();
|
||||
rocksdb_options_set_comparator(options, cmp);
|
||||
rocksdb_options_set_error_if_exists(options, 1);
|
||||
rocksdb_options_set_cache(options, cache);
|
||||
rocksdb_options_set_env(options, env);
|
||||
rocksdb_options_set_info_log(options, NULL);
|
||||
rocksdb_options_set_write_buffer_size(options, 100000);
|
||||
rocksdb_options_set_paranoid_checks(options, 1);
|
||||
rocksdb_options_set_max_open_files(options, 10);
|
||||
rocksdb_options_set_block_size(options, 1024);
|
||||
rocksdb_options_set_block_restart_interval(options, 8);
|
||||
rocksdb_options_set_compression(options, rocksdb_no_compression);
|
||||
rocksdb_options_set_compression_options(options, -14, -1, 0);
|
||||
int compression_levels[] = {rocksdb_no_compression, rocksdb_no_compression,
|
||||
rocksdb_no_compression, rocksdb_no_compression};
|
||||
rocksdb_options_set_compression_per_level(options, compression_levels, 4);
|
||||
|
||||
roptions = leveldb_readoptions_create();
|
||||
leveldb_readoptions_set_verify_checksums(roptions, 1);
|
||||
leveldb_readoptions_set_fill_cache(roptions, 0);
|
||||
roptions = rocksdb_readoptions_create();
|
||||
rocksdb_readoptions_set_verify_checksums(roptions, 1);
|
||||
rocksdb_readoptions_set_fill_cache(roptions, 0);
|
||||
|
||||
woptions = leveldb_writeoptions_create();
|
||||
leveldb_writeoptions_set_sync(woptions, 1);
|
||||
woptions = rocksdb_writeoptions_create();
|
||||
rocksdb_writeoptions_set_sync(woptions, 1);
|
||||
|
||||
StartPhase("destroy");
|
||||
leveldb_destroy_db(options, dbname, &err);
|
||||
rocksdb_destroy_db(options, dbname, &err);
|
||||
Free(&err);
|
||||
|
||||
StartPhase("open_error");
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
db = rocksdb_open(options, dbname, &err);
|
||||
CheckCondition(err != NULL);
|
||||
Free(&err);
|
||||
|
||||
StartPhase("open");
|
||||
leveldb_options_set_create_if_missing(options, 1);
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
rocksdb_options_set_create_if_missing(options, 1);
|
||||
db = rocksdb_open(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
|
||||
StartPhase("put");
|
||||
leveldb_put(db, woptions, "foo", 3, "hello", 5, &err);
|
||||
rocksdb_put(db, woptions, "foo", 3, "hello", 5, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("compactall");
|
||||
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||
rocksdb_compact_range(db, NULL, 0, NULL, 0);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("compactrange");
|
||||
leveldb_compact_range(db, "a", 1, "z", 1);
|
||||
rocksdb_compact_range(db, "a", 1, "z", 1);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
|
||||
StartPhase("writebatch");
|
||||
{
|
||||
leveldb_writebatch_t* wb = leveldb_writebatch_create();
|
||||
leveldb_writebatch_put(wb, "foo", 3, "a", 1);
|
||||
leveldb_writebatch_clear(wb);
|
||||
leveldb_writebatch_put(wb, "bar", 3, "b", 1);
|
||||
leveldb_writebatch_put(wb, "box", 3, "c", 1);
|
||||
leveldb_writebatch_delete(wb, "bar", 3);
|
||||
leveldb_write(db, woptions, wb, &err);
|
||||
rocksdb_writebatch_t* wb = rocksdb_writebatch_create();
|
||||
rocksdb_writebatch_put(wb, "foo", 3, "a", 1);
|
||||
rocksdb_writebatch_clear(wb);
|
||||
rocksdb_writebatch_put(wb, "bar", 3, "b", 1);
|
||||
rocksdb_writebatch_put(wb, "box", 3, "c", 1);
|
||||
rocksdb_writebatch_delete(wb, "bar", 3);
|
||||
rocksdb_write(db, woptions, wb, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
CheckGet(db, roptions, "bar", NULL);
|
||||
CheckGet(db, roptions, "box", "c");
|
||||
int pos = 0;
|
||||
leveldb_writebatch_iterate(wb, &pos, CheckPut, CheckDel);
|
||||
rocksdb_writebatch_iterate(wb, &pos, CheckPut, CheckDel);
|
||||
CheckCondition(pos == 3);
|
||||
leveldb_writebatch_destroy(wb);
|
||||
rocksdb_writebatch_destroy(wb);
|
||||
}
|
||||
|
||||
StartPhase("iter");
|
||||
{
|
||||
leveldb_iterator_t* iter = leveldb_create_iterator(db, roptions);
|
||||
CheckCondition(!leveldb_iter_valid(iter));
|
||||
leveldb_iter_seek_to_first(iter);
|
||||
CheckCondition(leveldb_iter_valid(iter));
|
||||
rocksdb_iterator_t* iter = rocksdb_create_iterator(db, roptions);
|
||||
CheckCondition(!rocksdb_iter_valid(iter));
|
||||
rocksdb_iter_seek_to_first(iter);
|
||||
CheckCondition(rocksdb_iter_valid(iter));
|
||||
CheckIter(iter, "box", "c");
|
||||
leveldb_iter_next(iter);
|
||||
rocksdb_iter_next(iter);
|
||||
CheckIter(iter, "foo", "hello");
|
||||
leveldb_iter_prev(iter);
|
||||
rocksdb_iter_prev(iter);
|
||||
CheckIter(iter, "box", "c");
|
||||
leveldb_iter_prev(iter);
|
||||
CheckCondition(!leveldb_iter_valid(iter));
|
||||
leveldb_iter_seek_to_last(iter);
|
||||
rocksdb_iter_prev(iter);
|
||||
CheckCondition(!rocksdb_iter_valid(iter));
|
||||
rocksdb_iter_seek_to_last(iter);
|
||||
CheckIter(iter, "foo", "hello");
|
||||
leveldb_iter_seek(iter, "b", 1);
|
||||
rocksdb_iter_seek(iter, "b", 1);
|
||||
CheckIter(iter, "box", "c");
|
||||
leveldb_iter_get_error(iter, &err);
|
||||
rocksdb_iter_get_error(iter, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_iter_destroy(iter);
|
||||
rocksdb_iter_destroy(iter);
|
||||
}
|
||||
|
||||
StartPhase("approximate_sizes");
|
||||
@ -279,39 +279,39 @@ int main(int argc, char** argv) {
|
||||
size_t start_len[2] = { 1, 21 };
|
||||
const char* limit[2] = { "k00000000000000010000", "z" };
|
||||
size_t limit_len[2] = { 21, 1 };
|
||||
leveldb_writeoptions_set_sync(woptions, 0);
|
||||
rocksdb_writeoptions_set_sync(woptions, 0);
|
||||
for (i = 0; i < n; i++) {
|
||||
snprintf(keybuf, sizeof(keybuf), "k%020d", i);
|
||||
snprintf(valbuf, sizeof(valbuf), "v%020d", i);
|
||||
leveldb_put(db, woptions, keybuf, strlen(keybuf), valbuf, strlen(valbuf),
|
||||
rocksdb_put(db, woptions, keybuf, strlen(keybuf), valbuf, strlen(valbuf),
|
||||
&err);
|
||||
CheckNoError(err);
|
||||
}
|
||||
leveldb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes);
|
||||
rocksdb_approximate_sizes(db, 2, start, start_len, limit, limit_len, sizes);
|
||||
CheckCondition(sizes[0] > 0);
|
||||
CheckCondition(sizes[1] > 0);
|
||||
}
|
||||
|
||||
StartPhase("property");
|
||||
{
|
||||
char* prop = leveldb_property_value(db, "nosuchprop");
|
||||
char* prop = rocksdb_property_value(db, "nosuchprop");
|
||||
CheckCondition(prop == NULL);
|
||||
prop = leveldb_property_value(db, "rocksdb.stats");
|
||||
prop = rocksdb_property_value(db, "rocksdb.stats");
|
||||
CheckCondition(prop != NULL);
|
||||
Free(&prop);
|
||||
}
|
||||
|
||||
StartPhase("snapshot");
|
||||
{
|
||||
const leveldb_snapshot_t* snap;
|
||||
snap = leveldb_create_snapshot(db);
|
||||
leveldb_delete(db, woptions, "foo", 3, &err);
|
||||
const rocksdb_snapshot_t* snap;
|
||||
snap = rocksdb_create_snapshot(db);
|
||||
rocksdb_delete(db, woptions, "foo", 3, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_readoptions_set_snapshot(roptions, snap);
|
||||
rocksdb_readoptions_set_snapshot(roptions, snap);
|
||||
CheckGet(db, roptions, "foo", "hello");
|
||||
leveldb_readoptions_set_snapshot(roptions, NULL);
|
||||
rocksdb_readoptions_set_snapshot(roptions, NULL);
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
leveldb_release_snapshot(db, snap);
|
||||
rocksdb_release_snapshot(db, snap);
|
||||
}
|
||||
|
||||
StartPhase("repair");
|
||||
@ -320,44 +320,44 @@ int main(int argc, char** argv) {
|
||||
// files (https://reviews.facebook.net/D6123) would leave
|
||||
// around deleted files and the repair process will find
|
||||
// those files and put them back into the database.
|
||||
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||
leveldb_close(db);
|
||||
leveldb_options_set_create_if_missing(options, 0);
|
||||
leveldb_options_set_error_if_exists(options, 0);
|
||||
leveldb_repair_db(options, dbname, &err);
|
||||
rocksdb_compact_range(db, NULL, 0, NULL, 0);
|
||||
rocksdb_close(db);
|
||||
rocksdb_options_set_create_if_missing(options, 0);
|
||||
rocksdb_options_set_error_if_exists(options, 0);
|
||||
rocksdb_repair_db(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
db = rocksdb_open(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
CheckGet(db, roptions, "foo", NULL);
|
||||
CheckGet(db, roptions, "bar", NULL);
|
||||
CheckGet(db, roptions, "box", "c");
|
||||
leveldb_options_set_create_if_missing(options, 1);
|
||||
leveldb_options_set_error_if_exists(options, 1);
|
||||
rocksdb_options_set_create_if_missing(options, 1);
|
||||
rocksdb_options_set_error_if_exists(options, 1);
|
||||
}
|
||||
|
||||
StartPhase("filter");
|
||||
for (run = 0; run < 2; run++) {
|
||||
// First run uses custom filter, second run uses bloom filter
|
||||
CheckNoError(err);
|
||||
leveldb_filterpolicy_t* policy;
|
||||
rocksdb_filterpolicy_t* policy;
|
||||
if (run == 0) {
|
||||
policy = leveldb_filterpolicy_create(
|
||||
policy = rocksdb_filterpolicy_create(
|
||||
NULL, FilterDestroy, FilterCreate, FilterKeyMatch, FilterName);
|
||||
} else {
|
||||
policy = leveldb_filterpolicy_create_bloom(10);
|
||||
policy = rocksdb_filterpolicy_create_bloom(10);
|
||||
}
|
||||
|
||||
// Create new database
|
||||
leveldb_close(db);
|
||||
leveldb_destroy_db(options, dbname, &err);
|
||||
leveldb_options_set_filter_policy(options, policy);
|
||||
db = leveldb_open(options, dbname, &err);
|
||||
rocksdb_close(db);
|
||||
rocksdb_destroy_db(options, dbname, &err);
|
||||
rocksdb_options_set_filter_policy(options, policy);
|
||||
db = rocksdb_open(options, dbname, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_put(db, woptions, "foo", 3, "foovalue", 8, &err);
|
||||
rocksdb_put(db, woptions, "foo", 3, "foovalue", 8, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_put(db, woptions, "bar", 3, "barvalue", 8, &err);
|
||||
rocksdb_put(db, woptions, "bar", 3, "barvalue", 8, &err);
|
||||
CheckNoError(err);
|
||||
leveldb_compact_range(db, NULL, 0, NULL, 0);
|
||||
rocksdb_compact_range(db, NULL, 0, NULL, 0);
|
||||
|
||||
fake_filter_result = 1;
|
||||
CheckGet(db, roptions, "foo", "foovalue");
|
||||
@ -372,18 +372,18 @@ int main(int argc, char** argv) {
|
||||
CheckGet(db, roptions, "foo", "foovalue");
|
||||
CheckGet(db, roptions, "bar", "barvalue");
|
||||
}
|
||||
leveldb_options_set_filter_policy(options, NULL);
|
||||
leveldb_filterpolicy_destroy(policy);
|
||||
rocksdb_options_set_filter_policy(options, NULL);
|
||||
rocksdb_filterpolicy_destroy(policy);
|
||||
}
|
||||
|
||||
StartPhase("cleanup");
|
||||
leveldb_close(db);
|
||||
leveldb_options_destroy(options);
|
||||
leveldb_readoptions_destroy(roptions);
|
||||
leveldb_writeoptions_destroy(woptions);
|
||||
leveldb_cache_destroy(cache);
|
||||
leveldb_comparator_destroy(cmp);
|
||||
leveldb_env_destroy(env);
|
||||
rocksdb_close(db);
|
||||
rocksdb_options_destroy(options);
|
||||
rocksdb_readoptions_destroy(roptions);
|
||||
rocksdb_writeoptions_destroy(woptions);
|
||||
rocksdb_cache_destroy(cache);
|
||||
rocksdb_comparator_destroy(cmp);
|
||||
rocksdb_env_destroy(env);
|
||||
|
||||
fprintf(stderr, "PASS\n");
|
||||
return 0;
|
||||
|
@ -49,6 +49,7 @@ DEFINE_string(benchmarks,
|
||||
"compact,"
|
||||
"readrandom,"
|
||||
"readseq,"
|
||||
"readtocache,"
|
||||
"readreverse,"
|
||||
"readwhilewriting,"
|
||||
"readrandomwriterandom,"
|
||||
@ -76,6 +77,7 @@ DEFINE_string(benchmarks,
|
||||
"\tdeleteseq -- delete N keys in sequential order\n"
|
||||
"\tdeleterandom -- delete N keys in random order\n"
|
||||
"\treadseq -- read N times sequentially\n"
|
||||
"\treadtocache -- 1 thread reading database sequentially\n"
|
||||
"\treadreverse -- read N times in reverse order\n"
|
||||
"\treadrandom -- read N times in random order\n"
|
||||
"\treadmissing -- read N missing keys in random order\n"
|
||||
@ -150,7 +152,7 @@ DEFINE_double(compression_ratio, 0.5, "Arrange to generate values that shrink"
|
||||
|
||||
DEFINE_bool(histogram, false, "Print histogram of operation timings");
|
||||
|
||||
DEFINE_int32(write_buffer_size, rocksdb::Options().write_buffer_size,
|
||||
DEFINE_int64(write_buffer_size, rocksdb::Options().write_buffer_size,
|
||||
"Number of bytes to buffer in memtable before compacting");
|
||||
|
||||
DEFINE_int32(max_write_buffer_number,
|
||||
@ -1062,6 +1064,10 @@ class Benchmark {
|
||||
method = &Benchmark::WriteRandom;
|
||||
} else if (name == Slice("readseq")) {
|
||||
method = &Benchmark::ReadSequential;
|
||||
} else if (name == Slice("readtocache")) {
|
||||
method = &Benchmark::ReadSequential;
|
||||
num_threads = 1;
|
||||
reads_ = num_;
|
||||
} else if (name == Slice("readreverse")) {
|
||||
method = &Benchmark::ReadReverse;
|
||||
} else if (name == Slice("readrandom")) {
|
||||
|
348
db/db_impl.cc
348
db/db_impl.cc
@ -241,6 +241,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
||||
mem_(new MemTable(internal_comparator_, mem_rep_factory_,
|
||||
NumberLevels(), options_)),
|
||||
logfile_number_(0),
|
||||
super_version_(nullptr),
|
||||
tmp_batch_(),
|
||||
bg_compaction_scheduled_(0),
|
||||
bg_flush_scheduled_(0),
|
||||
@ -316,6 +317,13 @@ DBImpl::~DBImpl() {
|
||||
bg_logstats_scheduled_) {
|
||||
bg_cv_.Wait();
|
||||
}
|
||||
if (super_version_ != nullptr) {
|
||||
bool is_last_reference __attribute__((unused));
|
||||
is_last_reference = super_version_->Unref();
|
||||
assert(is_last_reference);
|
||||
super_version_->Cleanup();
|
||||
delete super_version_;
|
||||
}
|
||||
mutex_.Unlock();
|
||||
|
||||
if (db_lock_ != nullptr) {
|
||||
@ -345,6 +353,13 @@ void DBImpl::TEST_Destroy_DBImpl() {
|
||||
bg_logstats_scheduled_) {
|
||||
bg_cv_.Wait();
|
||||
}
|
||||
if (super_version_ != nullptr) {
|
||||
bool is_last_reference __attribute__((unused));
|
||||
is_last_reference = super_version_->Unref();
|
||||
assert(is_last_reference);
|
||||
super_version_->Cleanup();
|
||||
delete super_version_;
|
||||
}
|
||||
|
||||
// Prevent new compactions from occuring.
|
||||
bg_work_gate_closed_ = true;
|
||||
@ -443,6 +458,49 @@ void DBImpl::MaybeDumpStats() {
|
||||
}
|
||||
}
|
||||
|
||||
// DBImpl::SuperVersion methods
|
||||
DBImpl::SuperVersion::SuperVersion(const int num_memtables) {
|
||||
to_delete.resize(num_memtables);
|
||||
}
|
||||
|
||||
DBImpl::SuperVersion::~SuperVersion() {
|
||||
for (auto td : to_delete) {
|
||||
delete td;
|
||||
}
|
||||
}
|
||||
|
||||
DBImpl::SuperVersion* DBImpl::SuperVersion::Ref() {
|
||||
refs.fetch_add(1, std::memory_order_relaxed);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool DBImpl::SuperVersion::Unref() {
|
||||
assert(refs > 0);
|
||||
// fetch_sub returns the previous value of ref
|
||||
return refs.fetch_sub(1, std::memory_order_relaxed) == 1;
|
||||
}
|
||||
|
||||
void DBImpl::SuperVersion::Cleanup() {
|
||||
assert(refs.load(std::memory_order_relaxed) == 0);
|
||||
imm.UnrefAll(&to_delete);
|
||||
MemTable* m = mem->Unref();
|
||||
if (m != nullptr) {
|
||||
to_delete.push_back(m);
|
||||
}
|
||||
current->Unref();
|
||||
}
|
||||
|
||||
void DBImpl::SuperVersion::Init(MemTable* new_mem, const MemTableList& new_imm,
|
||||
Version* new_current) {
|
||||
mem = new_mem;
|
||||
imm = new_imm;
|
||||
current = new_current;
|
||||
mem->Ref();
|
||||
imm.RefAll();
|
||||
current->Ref();
|
||||
refs.store(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Returns the list of live files in 'sst_live' and the list
|
||||
// of all files in the filesystem in 'all_files'.
|
||||
// no_full_scan = true -- never do the full scan using GetChildren()
|
||||
@ -518,11 +576,6 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state,
|
||||
// It is not necessary to hold the mutex when invoking this method.
|
||||
void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
|
||||
|
||||
// free pending memtables
|
||||
for (auto m : state.memtables_to_free) {
|
||||
delete m;
|
||||
}
|
||||
|
||||
// check if there is anything to do
|
||||
if (!state.all_files.size() &&
|
||||
!state.sst_delete_files.size() &&
|
||||
@ -1041,6 +1094,7 @@ Status DBImpl::WriteLevel0TableForRecovery(MemTable* mem, VersionEdit* edit) {
|
||||
stats.bytes_written = meta.file_size;
|
||||
stats.files_out_levelnp1 = 1;
|
||||
stats_[level].Add(stats);
|
||||
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES, meta.file_size);
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1129,6 +1183,7 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
|
||||
stats.micros = env_->NowMicros() - start_micros;
|
||||
stats.bytes_written = meta.file_size;
|
||||
stats_[level].Add(stats);
|
||||
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES, meta.file_size);
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1186,6 +1241,7 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
|
||||
file_number, pending_outputs_, &deletion_state.memtables_to_free);
|
||||
|
||||
if (s.ok()) {
|
||||
InstallSuperVersion(deletion_state);
|
||||
if (madeProgress) {
|
||||
*madeProgress = 1;
|
||||
}
|
||||
@ -1245,11 +1301,17 @@ int DBImpl::FindMinimumEmptyLevelFitting(int level) {
|
||||
void DBImpl::ReFitLevel(int level, int target_level) {
|
||||
assert(level < NumberLevels());
|
||||
|
||||
MutexLock l(&mutex_);
|
||||
SuperVersion* superversion_to_free = nullptr;
|
||||
SuperVersion* new_superversion =
|
||||
new SuperVersion(options_.max_write_buffer_number);
|
||||
|
||||
mutex_.Lock();
|
||||
|
||||
// only allow one thread refitting
|
||||
if (refitting_level_) {
|
||||
mutex_.Unlock();
|
||||
Log(options_.info_log, "ReFitLevel: another thread is refitting");
|
||||
delete new_superversion;
|
||||
return;
|
||||
}
|
||||
refitting_level_ = true;
|
||||
@ -1285,6 +1347,8 @@ void DBImpl::ReFitLevel(int level, int target_level) {
|
||||
edit.DebugString().data());
|
||||
|
||||
auto status = versions_->LogAndApply(&edit, &mutex_);
|
||||
superversion_to_free = InstallSuperVersion(new_superversion);
|
||||
new_superversion = nullptr;
|
||||
|
||||
Log(options_.info_log, "LogAndApply: %s\n", status.ToString().data());
|
||||
|
||||
@ -1296,6 +1360,10 @@ void DBImpl::ReFitLevel(int level, int target_level) {
|
||||
|
||||
refitting_level_ = false;
|
||||
bg_work_gate_closed_ = false;
|
||||
|
||||
mutex_.Unlock();
|
||||
delete superversion_to_free;
|
||||
delete new_superversion;
|
||||
}
|
||||
|
||||
int DBImpl::NumberLevels() {
|
||||
@ -1311,8 +1379,7 @@ int DBImpl::Level0StopWriteTrigger() {
|
||||
}
|
||||
|
||||
Status DBImpl::Flush(const FlushOptions& options) {
|
||||
Status status = FlushMemTable(options);
|
||||
return status;
|
||||
return FlushMemTable(options);
|
||||
}
|
||||
|
||||
SequenceNumber DBImpl::GetLatestSequenceNumber() const {
|
||||
@ -1669,7 +1736,7 @@ Status DBImpl::BackgroundFlush(bool* madeProgress,
|
||||
|
||||
void DBImpl::BackgroundCallFlush() {
|
||||
bool madeProgress = false;
|
||||
DeletionState deletion_state(options_.max_write_buffer_number);
|
||||
DeletionState deletion_state(options_.max_write_buffer_number, true);
|
||||
assert(bg_flush_scheduled_);
|
||||
MutexLock l(&mutex_);
|
||||
|
||||
@ -1715,7 +1782,7 @@ void DBImpl::TEST_PurgeObsoleteteWAL() {
|
||||
|
||||
void DBImpl::BackgroundCallCompaction() {
|
||||
bool madeProgress = false;
|
||||
DeletionState deletion_state(options_.max_write_buffer_number);
|
||||
DeletionState deletion_state(options_.max_write_buffer_number, true);
|
||||
|
||||
MaybeDumpStats();
|
||||
|
||||
@ -1768,7 +1835,7 @@ void DBImpl::BackgroundCallCompaction() {
|
||||
}
|
||||
|
||||
Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
||||
DeletionState& deletion_state) {
|
||||
DeletionState& deletion_state) {
|
||||
*madeProgress = false;
|
||||
mutex_.AssertHeld();
|
||||
|
||||
@ -1821,6 +1888,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
||||
f->smallest, f->largest,
|
||||
f->smallest_seqno, f->largest_seqno);
|
||||
status = versions_->LogAndApply(c->edit(), &mutex_);
|
||||
InstallSuperVersion(deletion_state);
|
||||
VersionSet::LevelSummaryStorage tmp;
|
||||
Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
|
||||
static_cast<unsigned long long>(f->number),
|
||||
@ -2454,14 +2522,22 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
|
||||
}
|
||||
stats.files_out_levelnp1 = num_output_files;
|
||||
|
||||
for (int i = 0; i < compact->compaction->num_input_files(0); i++)
|
||||
for (int i = 0; i < compact->compaction->num_input_files(0); i++) {
|
||||
stats.bytes_readn += compact->compaction->input(0, i)->file_size;
|
||||
RecordTick(options_.statistics.get(), COMPACT_READ_BYTES,
|
||||
compact->compaction->input(0, i)->file_size);
|
||||
}
|
||||
|
||||
for (int i = 0; i < compact->compaction->num_input_files(1); i++)
|
||||
for (int i = 0; i < compact->compaction->num_input_files(1); i++) {
|
||||
stats.bytes_readnp1 += compact->compaction->input(1, i)->file_size;
|
||||
RecordTick(options_.statistics.get(), COMPACT_READ_BYTES,
|
||||
compact->compaction->input(1, i)->file_size);
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_output_files; i++) {
|
||||
stats.bytes_written += compact->outputs[i].file_size;
|
||||
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES,
|
||||
compact->outputs[i].file_size);
|
||||
}
|
||||
|
||||
LogFlush(options_.info_log);
|
||||
@ -2474,6 +2550,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
|
||||
|
||||
if (status.ok()) {
|
||||
status = InstallCompactionResults(compact);
|
||||
InstallSuperVersion(deletion_state);
|
||||
}
|
||||
VersionSet::LevelSummaryStorage tmp;
|
||||
Log(options_.info_log,
|
||||
@ -2581,6 +2658,44 @@ Status DBImpl::Get(const ReadOptions& options,
|
||||
return GetImpl(options, key, value);
|
||||
}
|
||||
|
||||
// DeletionState gets created and destructed outside of the lock -- we
|
||||
// use this convinently to:
|
||||
// * malloc one SuperVersion() outside of the lock -- new_superversion
|
||||
// * delete one SuperVersion() outside of the lock -- superversion_to_free
|
||||
//
|
||||
// However, if InstallSuperVersion() gets called twice with the same,
|
||||
// deletion_state, we can't reuse the SuperVersion() that got malloced because
|
||||
// first call already used it. In that rare case, we take a hit and create a
|
||||
// new SuperVersion() inside of the mutex. We do similar thing
|
||||
// for superversion_to_free
|
||||
void DBImpl::InstallSuperVersion(DeletionState& deletion_state) {
|
||||
// if new_superversion == nullptr, it means somebody already used it
|
||||
SuperVersion* new_superversion =
|
||||
(deletion_state.new_superversion != nullptr) ?
|
||||
deletion_state.new_superversion : new SuperVersion();
|
||||
SuperVersion* old_superversion = InstallSuperVersion(new_superversion);
|
||||
deletion_state.new_superversion = nullptr;
|
||||
if (deletion_state.superversion_to_free != nullptr) {
|
||||
// somebody already put it there
|
||||
delete old_superversion;
|
||||
} else {
|
||||
deletion_state.superversion_to_free = old_superversion;
|
||||
}
|
||||
}
|
||||
|
||||
DBImpl::SuperVersion* DBImpl::InstallSuperVersion(
|
||||
SuperVersion* new_superversion) {
|
||||
mutex_.AssertHeld();
|
||||
new_superversion->Init(mem_, imm_, versions_->current());
|
||||
SuperVersion* old_superversion = super_version_;
|
||||
super_version_ = new_superversion;
|
||||
if (old_superversion != nullptr && old_superversion->Unref()) {
|
||||
old_superversion->Cleanup();
|
||||
return old_superversion; // will let caller delete outside of mutex
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Status DBImpl::GetImpl(const ReadOptions& options,
|
||||
const Slice& key,
|
||||
std::string* value,
|
||||
@ -2591,28 +2706,21 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
||||
StopWatchNano snapshot_timer(env_, false);
|
||||
StartPerfTimer(&snapshot_timer);
|
||||
SequenceNumber snapshot;
|
||||
std::vector<MemTable*> to_delete;
|
||||
to_delete.reserve(options_.max_write_buffer_number);
|
||||
mutex_.Lock();
|
||||
|
||||
if (options.snapshot != nullptr) {
|
||||
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
|
||||
} else {
|
||||
snapshot = versions_->LastSequence();
|
||||
}
|
||||
|
||||
MemTable* mem = mem_;
|
||||
MemTableList imm = imm_;
|
||||
Version* current = versions_->current();
|
||||
mem->Ref();
|
||||
imm.RefAll();
|
||||
current->Ref();
|
||||
|
||||
// Unlock while reading from files and memtables
|
||||
// This can be replaced by using atomics and spinlock instead of big mutex
|
||||
mutex_.Lock();
|
||||
SuperVersion* get_version = super_version_->Ref();
|
||||
mutex_.Unlock();
|
||||
|
||||
bool have_stat_update = false;
|
||||
Version::GetStats stats;
|
||||
|
||||
|
||||
// Prepare to store a list of merge operations if merge occurs.
|
||||
MergeContext merge_context;
|
||||
|
||||
@ -2621,18 +2729,18 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
||||
// merge_operands will contain the sequence of merges in the latter case.
|
||||
LookupKey lkey(key, snapshot);
|
||||
BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer);
|
||||
if (mem->Get(lkey, value, &s, merge_context, options_)) {
|
||||
if (get_version->mem->Get(lkey, value, &s, merge_context, options_)) {
|
||||
// Done
|
||||
RecordTick(options_.statistics.get(), MEMTABLE_HIT);
|
||||
} else if (imm.Get(lkey, value, &s, merge_context, options_)) {
|
||||
} else if (get_version->imm.Get(lkey, value, &s, merge_context, options_)) {
|
||||
// Done
|
||||
RecordTick(options_.statistics.get(), MEMTABLE_HIT);
|
||||
} else {
|
||||
StopWatchNano from_files_timer(env_, false);
|
||||
StartPerfTimer(&from_files_timer);
|
||||
|
||||
current->Get(options, lkey, value, &s, &merge_context, &stats,
|
||||
options_, value_found);
|
||||
get_version->current->Get(options, lkey, value, &s, &merge_context, &stats,
|
||||
options_, value_found);
|
||||
have_stat_update = true;
|
||||
BumpPerfTime(&perf_context.get_from_output_files_time, &from_files_timer);
|
||||
RecordTick(options_.statistics.get(), MEMTABLE_MISS);
|
||||
@ -2640,22 +2748,30 @@ Status DBImpl::GetImpl(const ReadOptions& options,
|
||||
|
||||
StopWatchNano post_process_timer(env_, false);
|
||||
StartPerfTimer(&post_process_timer);
|
||||
mutex_.Lock();
|
||||
|
||||
if (!options_.disable_seek_compaction &&
|
||||
have_stat_update && current->UpdateStats(stats)) {
|
||||
MaybeScheduleFlushOrCompaction();
|
||||
bool delete_get_version = false;
|
||||
if (!options_.disable_seek_compaction && have_stat_update) {
|
||||
mutex_.Lock();
|
||||
if (get_version->current->UpdateStats(stats)) {
|
||||
MaybeScheduleFlushOrCompaction();
|
||||
}
|
||||
if (get_version->Unref()) {
|
||||
get_version->Cleanup();
|
||||
delete_get_version = true;
|
||||
}
|
||||
mutex_.Unlock();
|
||||
} else {
|
||||
if (get_version->Unref()) {
|
||||
mutex_.Lock();
|
||||
get_version->Cleanup();
|
||||
mutex_.Unlock();
|
||||
delete_get_version = true;
|
||||
}
|
||||
}
|
||||
if (delete_get_version) {
|
||||
delete get_version;
|
||||
}
|
||||
MemTable* m = mem->Unref();
|
||||
imm.UnrefAll(&to_delete);
|
||||
current->Unref();
|
||||
mutex_.Unlock();
|
||||
|
||||
// free up all obsolete memtables outside the mutex
|
||||
delete m;
|
||||
for (MemTable* v: to_delete) delete v;
|
||||
|
||||
LogFlush(options_.info_log);
|
||||
// Note, tickers are atomic now - no lock protection needed any more.
|
||||
|
||||
RecordTick(options_.statistics.get(), NUMBER_KEYS_READ);
|
||||
@ -2673,7 +2789,6 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
||||
|
||||
SequenceNumber snapshot;
|
||||
std::vector<MemTable*> to_delete;
|
||||
to_delete.reserve(options_.max_write_buffer_number);
|
||||
|
||||
mutex_.Lock();
|
||||
if (options.snapshot != nullptr) {
|
||||
@ -2748,8 +2863,6 @@ std::vector<Status> DBImpl::MultiGet(const ReadOptions& options,
|
||||
delete m;
|
||||
for (MemTable* v: to_delete) delete v;
|
||||
|
||||
LogFlush(options_.info_log);
|
||||
|
||||
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_CALLS);
|
||||
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_KEYS_READ, numKeys);
|
||||
RecordTick(options_.statistics.get(), NUMBER_MULTIGET_BYTES_READ, bytesRead);
|
||||
@ -2831,17 +2944,27 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
w.done = false;
|
||||
|
||||
StopWatch sw(env_, options_.statistics.get(), DB_WRITE);
|
||||
MutexLock l(&mutex_);
|
||||
mutex_.Lock();
|
||||
writers_.push_back(&w);
|
||||
while (!w.done && &w != writers_.front()) {
|
||||
w.cv.Wait();
|
||||
}
|
||||
|
||||
if (!options.disableWAL) {
|
||||
RecordTick(options_.statistics.get(), WRITE_WITH_WAL, 1);
|
||||
}
|
||||
|
||||
if (w.done) {
|
||||
mutex_.Unlock();
|
||||
RecordTick(options_.statistics.get(), WRITE_DONE_BY_OTHER, 1);
|
||||
return w.status;
|
||||
} else {
|
||||
RecordTick(options_.statistics.get(), WRITE_DONE_BY_SELF, 1);
|
||||
}
|
||||
|
||||
// May temporarily unlock and wait.
|
||||
Status status = MakeRoomForWrite(my_batch == nullptr);
|
||||
SuperVersion* superversion_to_free = nullptr;
|
||||
Status status = MakeRoomForWrite(my_batch == nullptr, &superversion_to_free);
|
||||
uint64_t last_sequence = versions_->LastSequence();
|
||||
Writer* last_writer = &w;
|
||||
if (status.ok() && my_batch != nullptr) { // nullptr batch is for compactions
|
||||
@ -2877,7 +3000,10 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
if (!options.disableWAL) {
|
||||
StopWatchNano timer(env_);
|
||||
StartPerfTimer(&timer);
|
||||
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
|
||||
Slice log_entry = WriteBatchInternal::Contents(updates);
|
||||
status = log_->AddRecord(log_entry);
|
||||
RecordTick(options_.statistics.get(), WAL_FILE_SYNCED, 1);
|
||||
RecordTick(options_.statistics.get(), WAL_FILE_BYTES, log_entry.size());
|
||||
if (status.ok() && options.sync) {
|
||||
if (options_.use_fsync) {
|
||||
StopWatch(env_, options_.statistics.get(), WAL_FILE_SYNC_MICROS);
|
||||
@ -2906,7 +3032,6 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
SEQUENCE_NUMBER, last_sequence);
|
||||
}
|
||||
StartPerfTimer(&pre_post_process_timer);
|
||||
LogFlush(options_.info_log);
|
||||
mutex_.Lock();
|
||||
if (status.ok()) {
|
||||
versions_->SetLastSequence(last_sequence);
|
||||
@ -2933,6 +3058,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
|
||||
if (!writers_.empty()) {
|
||||
writers_.front()->cv.Signal();
|
||||
}
|
||||
mutex_.Unlock();
|
||||
delete superversion_to_free;
|
||||
BumpPerfTime(&perf_context.write_pre_and_post_process_time,
|
||||
&pre_post_process_timer);
|
||||
return status;
|
||||
@ -3027,7 +3154,8 @@ uint64_t DBImpl::SlowdownAmount(int n, int top, int bottom) {
|
||||
|
||||
// REQUIRES: mutex_ is held
|
||||
// REQUIRES: this thread is currently at the front of the writer queue
|
||||
Status DBImpl::MakeRoomForWrite(bool force) {
|
||||
Status DBImpl::MakeRoomForWrite(bool force,
|
||||
SuperVersion** superversion_to_free) {
|
||||
mutex_.AssertHeld();
|
||||
assert(!writers_.empty());
|
||||
bool allow_delay = !force;
|
||||
@ -3036,6 +3164,7 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
||||
uint64_t rate_limit_delay_millis = 0;
|
||||
Status s;
|
||||
double score;
|
||||
*superversion_to_free = nullptr;
|
||||
|
||||
while (true) {
|
||||
if (!bg_error_.ok()) {
|
||||
@ -3162,6 +3291,7 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
||||
// Do this without holding the dbmutex lock.
|
||||
assert(versions_->PrevLogNumber() == 0);
|
||||
uint64_t new_log_number = versions_->NewFileNumber();
|
||||
SuperVersion* new_superversion = nullptr;
|
||||
mutex_.Unlock();
|
||||
{
|
||||
EnvOptions soptions(storage_options_);
|
||||
@ -3178,6 +3308,7 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
||||
lfile->SetPreallocationBlockSize(1.1 * options_.write_buffer_size);
|
||||
memtmp = new MemTable(
|
||||
internal_comparator_, mem_rep_factory_, NumberLevels(), options_);
|
||||
new_superversion = new SuperVersion(options_.max_write_buffer_number);
|
||||
}
|
||||
}
|
||||
mutex_.Lock();
|
||||
@ -3202,11 +3333,16 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
||||
mem_->SetLogNumber(logfile_number_);
|
||||
force = false; // Do not force another compaction if have room
|
||||
MaybeScheduleFlushOrCompaction();
|
||||
*superversion_to_free = InstallSuperVersion(new_superversion);
|
||||
}
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
const std::string& DBImpl::GetName() const {
|
||||
return dbname_;
|
||||
}
|
||||
|
||||
Env* DBImpl::GetEnv() const {
|
||||
return env_;
|
||||
}
|
||||
@ -3256,6 +3392,13 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
|
||||
} else if (in == "stats") {
|
||||
char buf[1000];
|
||||
|
||||
uint64_t wal_bytes = 0;
|
||||
uint64_t wal_synced = 0;
|
||||
uint64_t user_bytes_written = 0;
|
||||
uint64_t write_other = 0;
|
||||
uint64_t write_self = 0;
|
||||
uint64_t write_with_wal = 0;
|
||||
uint64_t total_bytes_written = 0;
|
||||
uint64_t total_bytes_read = 0;
|
||||
uint64_t micros_up = env_->NowMicros() - started_at_;
|
||||
@ -3268,6 +3411,16 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
uint64_t interval_bytes_new = 0;
|
||||
double interval_seconds_up = 0;
|
||||
|
||||
Statistics* s = options_.statistics.get();
|
||||
if (s) {
|
||||
wal_bytes = s->getTickerCount(WAL_FILE_BYTES);
|
||||
wal_synced = s->getTickerCount(WAL_FILE_SYNCED);
|
||||
user_bytes_written = s->getTickerCount(BYTES_WRITTEN);
|
||||
write_other = s->getTickerCount(WRITE_DONE_BY_OTHER);
|
||||
write_self = s->getTickerCount(WRITE_DONE_BY_SELF);
|
||||
write_with_wal = s->getTickerCount(WRITE_WITH_WAL);
|
||||
}
|
||||
|
||||
// Pardon the long line but I think it is easier to read this way.
|
||||
snprintf(buf, sizeof(buf),
|
||||
" Compactions\n"
|
||||
@ -3324,19 +3477,38 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
}
|
||||
}
|
||||
|
||||
interval_bytes_new = stats_[0].bytes_written - last_stats_.bytes_new_;
|
||||
interval_bytes_read = total_bytes_read - last_stats_.bytes_read_;
|
||||
interval_bytes_written = total_bytes_written - last_stats_.bytes_written_;
|
||||
interval_bytes_new = user_bytes_written - last_stats_.ingest_bytes_;
|
||||
interval_bytes_read = total_bytes_read - last_stats_.compaction_bytes_read_;
|
||||
interval_bytes_written =
|
||||
total_bytes_written - last_stats_.compaction_bytes_written_;
|
||||
interval_seconds_up = seconds_up - last_stats_.seconds_up_;
|
||||
|
||||
snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n",
|
||||
seconds_up, interval_seconds_up);
|
||||
value->append(buf);
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Writes cumulative: %llu total, %llu batches, "
|
||||
"%.1f per batch, %.2f ingest GB\n",
|
||||
(unsigned long long) (write_other + write_self),
|
||||
(unsigned long long) write_self,
|
||||
(write_other + write_self) / (double) (write_self + 1),
|
||||
user_bytes_written / (1048576.0 * 1024));
|
||||
value->append(buf);
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
"WAL cumulative: %llu WAL writes, %llu WAL syncs, "
|
||||
"%.2f writes per sync, %.2f GB written\n",
|
||||
(unsigned long long) write_with_wal,
|
||||
(unsigned long long ) wal_synced,
|
||||
write_with_wal / (double) (wal_synced + 1),
|
||||
wal_bytes / (1048576.0 * 1024));
|
||||
value->append(buf);
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Compaction IO cumulative (GB): "
|
||||
"%.2f new, %.2f read, %.2f write, %.2f read+write\n",
|
||||
stats_[0].bytes_written / (1048576.0 * 1024),
|
||||
user_bytes_written / (1048576.0 * 1024),
|
||||
total_bytes_read / (1048576.0 * 1024),
|
||||
total_bytes_written / (1048576.0 * 1024),
|
||||
(total_bytes_read + total_bytes_written) / (1048576.0 * 1024));
|
||||
@ -3345,7 +3517,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Compaction IO cumulative (MB/sec): "
|
||||
"%.1f new, %.1f read, %.1f write, %.1f read+write\n",
|
||||
stats_[0].bytes_written / 1048576.0 / seconds_up,
|
||||
user_bytes_written / 1048576.0 / seconds_up,
|
||||
total_bytes_read / 1048576.0 / seconds_up,
|
||||
total_bytes_written / 1048576.0 / seconds_up,
|
||||
(total_bytes_read + total_bytes_written) / 1048576.0 / seconds_up);
|
||||
@ -3354,9 +3526,38 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
// +1 to avoid divide by 0 and NaN
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Amplification cumulative: %.1f write, %.1f compaction\n",
|
||||
(double) total_bytes_written / (stats_[0].bytes_written+1),
|
||||
(double) (total_bytes_written + total_bytes_read)
|
||||
/ (stats_[0].bytes_written+1));
|
||||
(double) (total_bytes_written + wal_bytes)
|
||||
/ (user_bytes_written + 1),
|
||||
(double) (total_bytes_written + total_bytes_read + wal_bytes)
|
||||
/ (user_bytes_written + 1));
|
||||
value->append(buf);
|
||||
|
||||
uint64_t interval_write_other = write_other - last_stats_.write_other_;
|
||||
uint64_t interval_write_self = write_self - last_stats_.write_self_;
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Writes interval: %llu total, %llu batches, "
|
||||
"%.1f per batch, %.1f ingest MB\n",
|
||||
(unsigned long long) (interval_write_other + interval_write_self),
|
||||
(unsigned long long) interval_write_self,
|
||||
(double) (interval_write_other + interval_write_self)
|
||||
/ (interval_write_self + 1),
|
||||
(user_bytes_written - last_stats_.ingest_bytes_) / 1048576.0);
|
||||
value->append(buf);
|
||||
|
||||
uint64_t interval_write_with_wal =
|
||||
write_with_wal - last_stats_.write_with_wal_;
|
||||
|
||||
uint64_t interval_wal_synced = wal_synced - last_stats_.wal_synced_;
|
||||
uint64_t interval_wal_bytes = wal_bytes - last_stats_.wal_bytes_;
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
"WAL interval: %llu WAL writes, %llu WAL syncs, "
|
||||
"%.2f writes per sync, %.2f MB written\n",
|
||||
(unsigned long long) interval_write_with_wal,
|
||||
(unsigned long long ) interval_wal_synced,
|
||||
interval_write_with_wal / (double) (interval_wal_synced + 1),
|
||||
interval_wal_bytes / (1048576.0 * 1024));
|
||||
value->append(buf);
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
@ -3381,9 +3582,10 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
// +1 to avoid divide by 0 and NaN
|
||||
snprintf(buf, sizeof(buf),
|
||||
"Amplification interval: %.1f write, %.1f compaction\n",
|
||||
(double) interval_bytes_written / (interval_bytes_new+1),
|
||||
(double) (interval_bytes_written + interval_bytes_read) /
|
||||
(interval_bytes_new+1));
|
||||
(double) (interval_bytes_written + wal_bytes)
|
||||
/ (interval_bytes_new + 1),
|
||||
(double) (interval_bytes_written + interval_bytes_read + wal_bytes)
|
||||
/ (interval_bytes_new + 1));
|
||||
value->append(buf);
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
@ -3404,10 +3606,15 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
||||
(unsigned long) total_slowdown_count);
|
||||
value->append(buf);
|
||||
|
||||
last_stats_.bytes_read_ = total_bytes_read;
|
||||
last_stats_.bytes_written_ = total_bytes_written;
|
||||
last_stats_.bytes_new_ = stats_[0].bytes_written;
|
||||
last_stats_.compaction_bytes_read_ = total_bytes_read;
|
||||
last_stats_.compaction_bytes_written_ = total_bytes_written;
|
||||
last_stats_.ingest_bytes_ = user_bytes_written;
|
||||
last_stats_.seconds_up_ = seconds_up;
|
||||
last_stats_.wal_bytes_ = wal_bytes;
|
||||
last_stats_.wal_synced_ = wal_synced;
|
||||
last_stats_.write_with_wal_ = write_with_wal;
|
||||
last_stats_.write_other_ = write_other;
|
||||
last_stats_.write_self_ = write_self;
|
||||
|
||||
return true;
|
||||
} else if (in == "sstables") {
|
||||
@ -3482,7 +3689,7 @@ Status DBImpl::DeleteFile(std::string name) {
|
||||
FileMetaData metadata;
|
||||
int maxlevel = NumberLevels();
|
||||
VersionEdit edit(maxlevel);
|
||||
DeletionState deletion_state;
|
||||
DeletionState deletion_state(0, true);
|
||||
{
|
||||
MutexLock l(&mutex_);
|
||||
status = versions_->GetMetadataForFile(number, &level, &metadata);
|
||||
@ -3512,14 +3719,14 @@ Status DBImpl::DeleteFile(std::string name) {
|
||||
}
|
||||
edit.DeleteFile(level, number);
|
||||
status = versions_->LogAndApply(&edit, &mutex_);
|
||||
if (status.ok()) {
|
||||
InstallSuperVersion(deletion_state);
|
||||
}
|
||||
FindObsoleteFiles(deletion_state, false);
|
||||
} // lock released here
|
||||
LogFlush(options_.info_log);
|
||||
|
||||
if (status.ok()) {
|
||||
// remove files outside the db-lock
|
||||
PurgeObsoleteFiles(deletion_state);
|
||||
}
|
||||
// remove files outside the db-lock
|
||||
PurgeObsoleteFiles(deletion_state);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -3619,6 +3826,7 @@ Status DB::Open(const Options& options, const std::string& dbname, DB** dbptr) {
|
||||
s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
|
||||
}
|
||||
if (s.ok()) {
|
||||
delete impl->InstallSuperVersion(new DBImpl::SuperVersion());
|
||||
impl->mem_->SetLogNumber(impl->logfile_number_);
|
||||
impl->DeleteObsoleteFiles();
|
||||
impl->MaybeScheduleFlushOrCompaction();
|
||||
|
95
db/db_impl.h
95
db/db_impl.h
@ -67,6 +67,7 @@ class DBImpl : public DB {
|
||||
virtual int NumberLevels();
|
||||
virtual int MaxMemCompactionLevel();
|
||||
virtual int Level0StopWriteTrigger();
|
||||
virtual const std::string& GetName() const;
|
||||
virtual Env* GetEnv() const;
|
||||
virtual const Options& GetOptions() const;
|
||||
virtual Status Flush(const FlushOptions& options);
|
||||
@ -127,12 +128,38 @@ class DBImpl : public DB {
|
||||
default_interval_to_delete_obsolete_WAL_ = default_interval_to_delete_obsolete_WAL;
|
||||
}
|
||||
|
||||
// needed for CleanupIteratorState
|
||||
// holds references to memtable, all immutable memtables and version
|
||||
struct SuperVersion {
|
||||
MemTable* mem;
|
||||
MemTableList imm;
|
||||
Version* current;
|
||||
std::atomic<uint32_t> refs;
|
||||
// We need to_delete because during Cleanup(), imm.UnrefAll() returns
|
||||
// all memtables that we need to free through this vector. We then
|
||||
// delete all those memtables outside of mutex, during destruction
|
||||
std::vector<MemTable*> to_delete;
|
||||
|
||||
// should be called outside the mutex
|
||||
explicit SuperVersion(const int num_memtables = 0);
|
||||
~SuperVersion();
|
||||
SuperVersion* Ref();
|
||||
// Returns true if this was the last reference and caller should
|
||||
// call Clenaup() and delete the object
|
||||
bool Unref();
|
||||
|
||||
// call these two methods with db mutex held
|
||||
// Cleanup unrefs mem, imm and current. Also, it stores all memtables
|
||||
// that needs to be deleted in to_delete vector. Unrefing those
|
||||
// objects needs to be done in the mutex
|
||||
void Cleanup();
|
||||
void Init(MemTable* new_mem, const MemTableList& new_imm,
|
||||
Version* new_current);
|
||||
};
|
||||
|
||||
// needed for CleanupIteratorState
|
||||
struct DeletionState {
|
||||
inline bool HaveSomethingToDelete() const {
|
||||
return memtables_to_free.size() ||
|
||||
all_files.size() ||
|
||||
return all_files.size() ||
|
||||
sst_delete_files.size() ||
|
||||
log_delete_files.size();
|
||||
}
|
||||
@ -154,15 +181,35 @@ class DBImpl : public DB {
|
||||
// a list of memtables to be free
|
||||
std::vector<MemTable *> memtables_to_free;
|
||||
|
||||
SuperVersion* superversion_to_free; // if nullptr nothing to free
|
||||
|
||||
SuperVersion* new_superversion; // if nullptr no new superversion
|
||||
|
||||
// the current manifest_file_number, log_number and prev_log_number
|
||||
// that corresponds to the set of files in 'live'.
|
||||
uint64_t manifest_file_number, log_number, prev_log_number;
|
||||
|
||||
explicit DeletionState(const int num_memtables = 0) {
|
||||
explicit DeletionState(const int num_memtables = 0,
|
||||
bool create_superversion = false) {
|
||||
manifest_file_number = 0;
|
||||
log_number = 0;
|
||||
prev_log_number = 0;
|
||||
memtables_to_free.reserve(num_memtables);
|
||||
superversion_to_free = nullptr;
|
||||
new_superversion =
|
||||
create_superversion ? new SuperVersion(num_memtables) : nullptr;
|
||||
}
|
||||
|
||||
~DeletionState() {
|
||||
// free pending memtables
|
||||
for (auto m : memtables_to_free) {
|
||||
delete m;
|
||||
}
|
||||
// free superversion. if nullptr, this will be noop
|
||||
delete superversion_to_free;
|
||||
// if new_superversion was not used, it will be non-nullptr and needs
|
||||
// to be freed here
|
||||
delete new_superversion;
|
||||
}
|
||||
};
|
||||
|
||||
@ -239,7 +286,11 @@ class DBImpl : public DB {
|
||||
uint64_t* filenumber);
|
||||
|
||||
uint64_t SlowdownAmount(int n, int top, int bottom);
|
||||
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
|
||||
// MakeRoomForWrite will return superversion_to_free through an arugment,
|
||||
// which the caller needs to delete. We do it because caller can delete
|
||||
// the superversion outside of mutex
|
||||
Status MakeRoomForWrite(bool force /* compact even if there is room? */,
|
||||
SuperVersion** superversion_to_free);
|
||||
WriteBatch* BuildBatchGroup(Writer** last_writer);
|
||||
|
||||
// Force current memtable contents to be flushed.
|
||||
@ -323,6 +374,8 @@ class DBImpl : public DB {
|
||||
uint64_t logfile_number_;
|
||||
unique_ptr<log::Writer> log_;
|
||||
|
||||
SuperVersion* super_version_;
|
||||
|
||||
std::string host_name_;
|
||||
|
||||
// Queue of writers.
|
||||
@ -440,15 +493,25 @@ class DBImpl : public DB {
|
||||
|
||||
// Used to compute per-interval statistics
|
||||
struct StatsSnapshot {
|
||||
uint64_t bytes_read_;
|
||||
uint64_t bytes_written_;
|
||||
uint64_t bytes_new_;
|
||||
uint64_t compaction_bytes_read_; // Bytes read by compaction
|
||||
uint64_t compaction_bytes_written_; // Bytes written by compaction
|
||||
uint64_t ingest_bytes_; // Bytes written by user
|
||||
uint64_t wal_bytes_; // Bytes written to WAL
|
||||
uint64_t wal_synced_; // Number of times WAL is synced
|
||||
uint64_t write_with_wal_; // Number of writes that request WAL
|
||||
// These count the number of writes processed by the calling thread or
|
||||
// another thread.
|
||||
uint64_t write_other_;
|
||||
uint64_t write_self_;
|
||||
double seconds_up_;
|
||||
|
||||
StatsSnapshot() : bytes_read_(0), bytes_written_(0),
|
||||
bytes_new_(0), seconds_up_(0) {}
|
||||
StatsSnapshot() : compaction_bytes_read_(0), compaction_bytes_written_(0),
|
||||
ingest_bytes_(0), wal_bytes_(0), wal_synced_(0),
|
||||
write_with_wal_(0), write_other_(0), write_self_(0),
|
||||
seconds_up_(0) {}
|
||||
};
|
||||
|
||||
// Counters from the previous time per-interval stats were computed
|
||||
StatsSnapshot last_stats_;
|
||||
|
||||
static const int KEEP_LOG_FILE_NUM = 1000;
|
||||
@ -480,6 +543,18 @@ class DBImpl : public DB {
|
||||
std::vector<SequenceNumber>& snapshots,
|
||||
SequenceNumber* prev_snapshot);
|
||||
|
||||
// will return a pointer to SuperVersion* if previous SuperVersion
|
||||
// if its reference count is zero and needs deletion or nullptr if not
|
||||
// As argument takes a pointer to allocated SuperVersion
|
||||
// Foreground threads call this function directly (they don't carry
|
||||
// deletion state and have to handle their own creation and deletion
|
||||
// of SuperVersion)
|
||||
SuperVersion* InstallSuperVersion(SuperVersion* new_superversion);
|
||||
// Background threads call this function, which is just a wrapper around
|
||||
// the InstallSuperVersion() function above. Background threads carry
|
||||
// deletion_state which can have new_superversion already allocated.
|
||||
void InstallSuperVersion(DeletionState& deletion_state);
|
||||
|
||||
// Function that Get and KeyMayExist call with no_io true or false
|
||||
// Note: 'value_found' from KeyMayExist propagates here
|
||||
Status GetImpl(const ReadOptions& options,
|
||||
|
182
db/db_test.cc
182
db/db_test.cc
@ -701,23 +701,25 @@ static std::string Key(int i) {
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
TEST(DBTest, Empty) {
|
||||
/*
|
||||
TEST(DBTest, GetFromImmutableLayer) {
|
||||
do {
|
||||
ASSERT_TRUE(db_ != nullptr);
|
||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
Options options = CurrentOptions();
|
||||
options.env = env_;
|
||||
options.write_buffer_size = 100000; // Small write buffer
|
||||
Reopen(&options);
|
||||
|
||||
TEST(DBTest, ReadWrite) {
|
||||
do {
|
||||
ASSERT_OK(Put("foo", "v1"));
|
||||
ASSERT_EQ("v1", Get("foo"));
|
||||
ASSERT_OK(Put("bar", "v2"));
|
||||
ASSERT_OK(Put("foo", "v3"));
|
||||
ASSERT_EQ("v3", Get("foo"));
|
||||
ASSERT_EQ("v2", Get("bar"));
|
||||
|
||||
env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
|
||||
Put("k1", std::string(100000, 'x')); // Fill memtable
|
||||
Put("k2", std::string(100000, 'y')); // Trigger compaction
|
||||
ASSERT_EQ("v1", Get("foo"));
|
||||
env_->delay_sstable_sync_.Release_Store(nullptr); // Release sync calls
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
*/
|
||||
|
||||
// Make sure that when options.block_cache is set, after a new table is
|
||||
// created its index/filter blocks are added to block cache.
|
||||
@ -731,7 +733,7 @@ TEST(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
|
||||
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "key", "val"));
|
||||
// Create a new talbe.
|
||||
dbfull()->Flush(FlushOptions());
|
||||
ASSERT_OK(dbfull()->Flush(FlushOptions()));
|
||||
|
||||
// index/filter blocks added to block cache right after table creation.
|
||||
ASSERT_EQ(1,
|
||||
@ -776,157 +778,6 @@ TEST(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
|
||||
options.statistics.get()->getTickerCount(BLOCK_CACHE_FILTER_HIT));
|
||||
}
|
||||
|
||||
TEST(DBTest, LevelLimitReopen) {
|
||||
Options options = CurrentOptions();
|
||||
Reopen(&options);
|
||||
|
||||
const std::string value(1024 * 1024, ' ');
|
||||
int i = 0;
|
||||
while (NumTableFilesAtLevel(2) == 0) {
|
||||
ASSERT_OK(Put(Key(i++), value));
|
||||
}
|
||||
|
||||
options.num_levels = 1;
|
||||
options.max_bytes_for_level_multiplier_additional.resize(1, 1);
|
||||
Status s = TryReopen(&options);
|
||||
ASSERT_EQ(s.IsCorruption(), true);
|
||||
ASSERT_EQ(s.ToString(),
|
||||
"Corruption: VersionEdit: db already has "
|
||||
"more levels than options.num_levels");
|
||||
|
||||
options.num_levels = 10;
|
||||
options.max_bytes_for_level_multiplier_additional.resize(10, 1);
|
||||
ASSERT_OK(TryReopen(&options));
|
||||
}
|
||||
|
||||
TEST(DBTest, Preallocation) {
|
||||
const std::string src = dbname_ + "/alloc_test";
|
||||
unique_ptr<WritableFile> srcfile;
|
||||
const EnvOptions soptions;
|
||||
ASSERT_OK(env_->NewWritableFile(src, &srcfile, soptions));
|
||||
srcfile->SetPreallocationBlockSize(1024 * 1024);
|
||||
|
||||
// No writes should mean no preallocation
|
||||
size_t block_size, last_allocated_block;
|
||||
srcfile->GetPreallocationStatus(&block_size, &last_allocated_block);
|
||||
ASSERT_EQ(last_allocated_block, 0UL);
|
||||
|
||||
// Small write should preallocate one block
|
||||
srcfile->Append("test");
|
||||
srcfile->GetPreallocationStatus(&block_size, &last_allocated_block);
|
||||
ASSERT_EQ(last_allocated_block, 1UL);
|
||||
|
||||
// Write an entire preallocation block, make sure we increased by two.
|
||||
std::string buf(block_size, ' ');
|
||||
srcfile->Append(buf);
|
||||
srcfile->GetPreallocationStatus(&block_size, &last_allocated_block);
|
||||
ASSERT_EQ(last_allocated_block, 2UL);
|
||||
|
||||
// Write five more blocks at once, ensure we're where we need to be.
|
||||
buf = std::string(block_size * 5, ' ');
|
||||
srcfile->Append(buf);
|
||||
srcfile->GetPreallocationStatus(&block_size, &last_allocated_block);
|
||||
ASSERT_EQ(last_allocated_block, 7UL);
|
||||
}
|
||||
|
||||
TEST(DBTest, PutDeleteGet) {
|
||||
do {
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
|
||||
ASSERT_EQ("v1", Get("foo"));
|
||||
ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
|
||||
ASSERT_EQ("v2", Get("foo"));
|
||||
ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
|
||||
ASSERT_EQ("NOT_FOUND", Get("foo"));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
|
||||
TEST(DBTest, GetFromImmutableLayer) {
|
||||
do {
|
||||
Options options = CurrentOptions();
|
||||
options.env = env_;
|
||||
options.write_buffer_size = 100000; // Small write buffer
|
||||
Reopen(&options);
|
||||
|
||||
ASSERT_OK(Put("foo", "v1"));
|
||||
ASSERT_EQ("v1", Get("foo"));
|
||||
|
||||
env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
|
||||
Put("k1", std::string(100000, 'x')); // Fill memtable
|
||||
Put("k2", std::string(100000, 'y')); // Trigger compaction
|
||||
ASSERT_EQ("v1", Get("foo"));
|
||||
env_->delay_sstable_sync_.Release_Store(nullptr); // Release sync calls
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST(DBTest, GetFromVersions) {
|
||||
do {
|
||||
ASSERT_OK(Put("foo", "v1"));
|
||||
dbfull()->TEST_FlushMemTable();
|
||||
ASSERT_EQ("v1", Get("foo"));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST(DBTest, GetSnapshot) {
|
||||
do {
|
||||
// Try with both a short key and a long key
|
||||
for (int i = 0; i < 2; i++) {
|
||||
std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
|
||||
ASSERT_OK(Put(key, "v1"));
|
||||
const Snapshot* s1 = db_->GetSnapshot();
|
||||
ASSERT_OK(Put(key, "v2"));
|
||||
ASSERT_EQ("v2", Get(key));
|
||||
ASSERT_EQ("v1", Get(key, s1));
|
||||
dbfull()->TEST_FlushMemTable();
|
||||
ASSERT_EQ("v2", Get(key));
|
||||
ASSERT_EQ("v1", Get(key, s1));
|
||||
db_->ReleaseSnapshot(s1);
|
||||
}
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST(DBTest, GetLevel0Ordering) {
|
||||
do {
|
||||
// Check that we process level-0 files in correct order. The code
|
||||
// below generates two level-0 files where the earlier one comes
|
||||
// before the later one in the level-0 file list since the earlier
|
||||
// one has a smaller "smallest" key.
|
||||
ASSERT_OK(Put("bar", "b"));
|
||||
ASSERT_OK(Put("foo", "v1"));
|
||||
dbfull()->TEST_FlushMemTable();
|
||||
ASSERT_OK(Put("foo", "v2"));
|
||||
dbfull()->TEST_FlushMemTable();
|
||||
ASSERT_EQ("v2", Get("foo"));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST(DBTest, GetOrderedByLevels) {
|
||||
do {
|
||||
ASSERT_OK(Put("foo", "v1"));
|
||||
Compact("a", "z");
|
||||
ASSERT_EQ("v1", Get("foo"));
|
||||
ASSERT_OK(Put("foo", "v2"));
|
||||
ASSERT_EQ("v2", Get("foo"));
|
||||
dbfull()->TEST_FlushMemTable();
|
||||
ASSERT_EQ("v2", Get("foo"));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST(DBTest, GetPicksCorrectFile) {
|
||||
do {
|
||||
// Arrange to have multiple files in a non-level-0 level.
|
||||
ASSERT_OK(Put("a", "va"));
|
||||
Compact("a", "b");
|
||||
ASSERT_OK(Put("x", "vx"));
|
||||
Compact("x", "y");
|
||||
ASSERT_OK(Put("f", "vf"));
|
||||
Compact("f", "g");
|
||||
ASSERT_EQ("va", Get("a"));
|
||||
ASSERT_EQ("vf", Get("f"));
|
||||
ASSERT_EQ("vx", Get("x"));
|
||||
} while (ChangeOptions());
|
||||
}
|
||||
|
||||
TEST(DBTest, GetEncountersEmptyLevel) {
|
||||
do {
|
||||
// Arrange for the following to happen:
|
||||
@ -4510,6 +4361,10 @@ class ModelDB: public DB {
|
||||
return -1;
|
||||
}
|
||||
|
||||
virtual const std::string& GetName() const {
|
||||
return name_;
|
||||
}
|
||||
|
||||
virtual Env* GetEnv() const {
|
||||
return nullptr;
|
||||
}
|
||||
@ -4587,6 +4442,7 @@ class ModelDB: public DB {
|
||||
};
|
||||
const Options options_;
|
||||
KVMap map_;
|
||||
std::string name_ = "";
|
||||
};
|
||||
|
||||
static std::string RandomKey(Random* rnd, int minimum = 0) {
|
||||
|
@ -279,7 +279,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
|
||||
*s = Status::Corruption("Error: Could not perform merge.");
|
||||
}
|
||||
} else {
|
||||
*s = Status::NotFound(Slice());
|
||||
*s = Status::NotFound();
|
||||
}
|
||||
found_final_value = true;
|
||||
break;
|
||||
|
@ -50,9 +50,8 @@ Status TableCache::FindTable(const EnvOptions& toptions,
|
||||
Cache::Handle** handle, bool* table_io,
|
||||
const bool no_io) {
|
||||
Status s;
|
||||
char buf[sizeof(file_number)];
|
||||
EncodeFixed64(buf, file_number);
|
||||
Slice key(buf, sizeof(buf));
|
||||
Slice key(reinterpret_cast<const char*>(&file_number), sizeof(file_number));
|
||||
|
||||
*handle = cache_->Lookup(key);
|
||||
if (*handle == nullptr) {
|
||||
if (no_io) { // Dont do IO and return a not-found status
|
||||
@ -165,9 +164,8 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options,
|
||||
}
|
||||
|
||||
void TableCache::Evict(uint64_t file_number) {
|
||||
char buf[sizeof(file_number)];
|
||||
EncodeFixed64(buf, file_number);
|
||||
cache_->Erase(Slice(buf, sizeof(buf)));
|
||||
Slice key(reinterpret_cast<const char*>(&file_number), sizeof(file_number));
|
||||
cache_->Erase(key);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -545,7 +545,7 @@ void Version::Get(const ReadOptions& options,
|
||||
case kFound:
|
||||
return;
|
||||
case kDeleted:
|
||||
*status = Status::NotFound(Slice()); // Use empty error message for speed
|
||||
*status = Status::NotFound(); // Use empty error message for speed
|
||||
return;
|
||||
case kCorrupt:
|
||||
*status = Status::Corruption("corrupted key for ", user_key);
|
||||
@ -570,7 +570,7 @@ void Version::Get(const ReadOptions& options,
|
||||
user_key);
|
||||
}
|
||||
} else {
|
||||
*status = Status::NotFound(Slice()); // Use an empty error message for speed
|
||||
*status = Status::NotFound(); // Use an empty error message for speed
|
||||
}
|
||||
}
|
||||
|
||||
@ -1112,12 +1112,6 @@ class VersionSet::Builder {
|
||||
MaybeAddFile(v, level, *base_iter);
|
||||
}
|
||||
}
|
||||
// Pre-sort level0 for Get()
|
||||
if (vset_->options_->compaction_style == kCompactionStyleUniversal) {
|
||||
std::sort(v->files_[0].begin(), v->files_[0].end(), NewestFirstBySeqNo);
|
||||
} else {
|
||||
std::sort(v->files_[0].begin(), v->files_[0].end(), NewestFirst);
|
||||
}
|
||||
|
||||
CheckConsistency(v);
|
||||
}
|
||||
@ -1683,6 +1677,12 @@ void VersionSet::MarkFileNumberUsed(uint64_t number) {
|
||||
|
||||
void VersionSet::Finalize(Version* v,
|
||||
std::vector<uint64_t>& size_being_compacted) {
|
||||
// Pre-sort level0 for Get()
|
||||
if (options_->compaction_style == kCompactionStyleUniversal) {
|
||||
std::sort(v->files_[0].begin(), v->files_[0].end(), NewestFirstBySeqNo);
|
||||
} else {
|
||||
std::sort(v->files_[0].begin(), v->files_[0].end(), NewestFirst);
|
||||
}
|
||||
|
||||
double max_score = 0;
|
||||
int max_score_level = 0;
|
||||
|
@ -274,12 +274,14 @@ class VersionSet {
|
||||
int64_t NumLevelBytes(int level) const;
|
||||
|
||||
// Return the last sequence number.
|
||||
uint64_t LastSequence() const { return last_sequence_; }
|
||||
uint64_t LastSequence() const {
|
||||
return last_sequence_.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
// Set the last sequence number to s.
|
||||
void SetLastSequence(uint64_t s) {
|
||||
assert(s >= last_sequence_);
|
||||
last_sequence_ = s;
|
||||
last_sequence_.store(s, std::memory_order_release);
|
||||
}
|
||||
|
||||
// Mark the specified file number as used.
|
||||
@ -478,7 +480,7 @@ class VersionSet {
|
||||
const InternalKeyComparator icmp_;
|
||||
uint64_t next_file_number_;
|
||||
uint64_t manifest_file_number_;
|
||||
uint64_t last_sequence_;
|
||||
std::atomic<uint64_t> last_sequence_;
|
||||
uint64_t log_number_;
|
||||
uint64_t prev_log_number_; // 0 or backing store for memtable being compacted
|
||||
|
||||
|
@ -54,171 +54,204 @@ extern "C" {
|
||||
|
||||
/* Exported types */
|
||||
|
||||
typedef struct leveldb_t leveldb_t;
|
||||
typedef struct leveldb_cache_t leveldb_cache_t;
|
||||
typedef struct leveldb_comparator_t leveldb_comparator_t;
|
||||
typedef struct leveldb_env_t leveldb_env_t;
|
||||
typedef struct leveldb_filelock_t leveldb_filelock_t;
|
||||
typedef struct leveldb_filterpolicy_t leveldb_filterpolicy_t;
|
||||
typedef struct leveldb_iterator_t leveldb_iterator_t;
|
||||
typedef struct leveldb_logger_t leveldb_logger_t;
|
||||
typedef struct leveldb_options_t leveldb_options_t;
|
||||
typedef struct leveldb_randomfile_t leveldb_randomfile_t;
|
||||
typedef struct leveldb_readoptions_t leveldb_readoptions_t;
|
||||
typedef struct leveldb_seqfile_t leveldb_seqfile_t;
|
||||
typedef struct leveldb_snapshot_t leveldb_snapshot_t;
|
||||
typedef struct leveldb_writablefile_t leveldb_writablefile_t;
|
||||
typedef struct leveldb_writebatch_t leveldb_writebatch_t;
|
||||
typedef struct leveldb_writeoptions_t leveldb_writeoptions_t;
|
||||
typedef struct rocksdb_t rocksdb_t;
|
||||
typedef struct rocksdb_cache_t rocksdb_cache_t;
|
||||
typedef struct rocksdb_comparator_t rocksdb_comparator_t;
|
||||
typedef struct rocksdb_env_t rocksdb_env_t;
|
||||
typedef struct rocksdb_filelock_t rocksdb_filelock_t;
|
||||
typedef struct rocksdb_filterpolicy_t rocksdb_filterpolicy_t;
|
||||
typedef struct rocksdb_iterator_t rocksdb_iterator_t;
|
||||
typedef struct rocksdb_logger_t rocksdb_logger_t;
|
||||
typedef struct rocksdb_options_t rocksdb_options_t;
|
||||
typedef struct rocksdb_randomfile_t rocksdb_randomfile_t;
|
||||
typedef struct rocksdb_readoptions_t rocksdb_readoptions_t;
|
||||
typedef struct rocksdb_seqfile_t rocksdb_seqfile_t;
|
||||
typedef struct rocksdb_snapshot_t rocksdb_snapshot_t;
|
||||
typedef struct rocksdb_writablefile_t rocksdb_writablefile_t;
|
||||
typedef struct rocksdb_writebatch_t rocksdb_writebatch_t;
|
||||
typedef struct rocksdb_writeoptions_t rocksdb_writeoptions_t;
|
||||
typedef struct rocksdb_universal_compaction_options_t rocksdb_universal_compaction_options_t;
|
||||
|
||||
/* DB operations */
|
||||
|
||||
extern leveldb_t* leveldb_open(
|
||||
const leveldb_options_t* options,
|
||||
extern rocksdb_t* rocksdb_open(
|
||||
const rocksdb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr);
|
||||
|
||||
extern void leveldb_close(leveldb_t* db);
|
||||
extern void rocksdb_close(rocksdb_t* db);
|
||||
|
||||
extern void leveldb_put(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
extern void rocksdb_put(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
const char* val, size_t vallen,
|
||||
char** errptr);
|
||||
|
||||
extern void leveldb_delete(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
extern void rocksdb_delete(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_writeoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
char** errptr);
|
||||
|
||||
extern void leveldb_write(
|
||||
leveldb_t* db,
|
||||
const leveldb_writeoptions_t* options,
|
||||
leveldb_writebatch_t* batch,
|
||||
extern void rocksdb_write(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_writeoptions_t* options,
|
||||
rocksdb_writebatch_t* batch,
|
||||
char** errptr);
|
||||
|
||||
/* Returns NULL if not found. A malloc()ed array otherwise.
|
||||
Stores the length of the array in *vallen. */
|
||||
extern char* leveldb_get(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options,
|
||||
extern char* rocksdb_get(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_readoptions_t* options,
|
||||
const char* key, size_t keylen,
|
||||
size_t* vallen,
|
||||
char** errptr);
|
||||
|
||||
extern leveldb_iterator_t* leveldb_create_iterator(
|
||||
leveldb_t* db,
|
||||
const leveldb_readoptions_t* options);
|
||||
extern rocksdb_iterator_t* rocksdb_create_iterator(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_readoptions_t* options);
|
||||
|
||||
extern const leveldb_snapshot_t* leveldb_create_snapshot(
|
||||
leveldb_t* db);
|
||||
extern const rocksdb_snapshot_t* rocksdb_create_snapshot(
|
||||
rocksdb_t* db);
|
||||
|
||||
extern void leveldb_release_snapshot(
|
||||
leveldb_t* db,
|
||||
const leveldb_snapshot_t* snapshot);
|
||||
extern void rocksdb_release_snapshot(
|
||||
rocksdb_t* db,
|
||||
const rocksdb_snapshot_t* snapshot);
|
||||
|
||||
/* Returns NULL if property name is unknown.
|
||||
Else returns a pointer to a malloc()-ed null-terminated value. */
|
||||
extern char* leveldb_property_value(
|
||||
leveldb_t* db,
|
||||
extern char* rocksdb_property_value(
|
||||
rocksdb_t* db,
|
||||
const char* propname);
|
||||
|
||||
extern void leveldb_approximate_sizes(
|
||||
leveldb_t* db,
|
||||
extern void rocksdb_approximate_sizes(
|
||||
rocksdb_t* db,
|
||||
int num_ranges,
|
||||
const char* const* range_start_key, const size_t* range_start_key_len,
|
||||
const char* const* range_limit_key, const size_t* range_limit_key_len,
|
||||
uint64_t* sizes);
|
||||
|
||||
extern void leveldb_compact_range(
|
||||
leveldb_t* db,
|
||||
extern void rocksdb_compact_range(
|
||||
rocksdb_t* db,
|
||||
const char* start_key, size_t start_key_len,
|
||||
const char* limit_key, size_t limit_key_len);
|
||||
|
||||
/* Management operations */
|
||||
|
||||
extern void leveldb_destroy_db(
|
||||
const leveldb_options_t* options,
|
||||
extern void rocksdb_destroy_db(
|
||||
const rocksdb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr);
|
||||
|
||||
extern void leveldb_repair_db(
|
||||
const leveldb_options_t* options,
|
||||
extern void rocksdb_repair_db(
|
||||
const rocksdb_options_t* options,
|
||||
const char* name,
|
||||
char** errptr);
|
||||
|
||||
/* Iterator */
|
||||
|
||||
extern void leveldb_iter_destroy(leveldb_iterator_t*);
|
||||
extern unsigned char leveldb_iter_valid(const leveldb_iterator_t*);
|
||||
extern void leveldb_iter_seek_to_first(leveldb_iterator_t*);
|
||||
extern void leveldb_iter_seek_to_last(leveldb_iterator_t*);
|
||||
extern void leveldb_iter_seek(leveldb_iterator_t*, const char* k, size_t klen);
|
||||
extern void leveldb_iter_next(leveldb_iterator_t*);
|
||||
extern void leveldb_iter_prev(leveldb_iterator_t*);
|
||||
extern const char* leveldb_iter_key(const leveldb_iterator_t*, size_t* klen);
|
||||
extern const char* leveldb_iter_value(const leveldb_iterator_t*, size_t* vlen);
|
||||
extern void leveldb_iter_get_error(const leveldb_iterator_t*, char** errptr);
|
||||
extern void rocksdb_iter_destroy(rocksdb_iterator_t*);
|
||||
extern unsigned char rocksdb_iter_valid(const rocksdb_iterator_t*);
|
||||
extern void rocksdb_iter_seek_to_first(rocksdb_iterator_t*);
|
||||
extern void rocksdb_iter_seek_to_last(rocksdb_iterator_t*);
|
||||
extern void rocksdb_iter_seek(rocksdb_iterator_t*, const char* k, size_t klen);
|
||||
extern void rocksdb_iter_next(rocksdb_iterator_t*);
|
||||
extern void rocksdb_iter_prev(rocksdb_iterator_t*);
|
||||
extern const char* rocksdb_iter_key(const rocksdb_iterator_t*, size_t* klen);
|
||||
extern const char* rocksdb_iter_value(const rocksdb_iterator_t*, size_t* vlen);
|
||||
extern void rocksdb_iter_get_error(const rocksdb_iterator_t*, char** errptr);
|
||||
|
||||
/* Write batch */
|
||||
|
||||
extern leveldb_writebatch_t* leveldb_writebatch_create();
|
||||
extern void leveldb_writebatch_destroy(leveldb_writebatch_t*);
|
||||
extern void leveldb_writebatch_clear(leveldb_writebatch_t*);
|
||||
extern void leveldb_writebatch_put(
|
||||
leveldb_writebatch_t*,
|
||||
extern rocksdb_writebatch_t* rocksdb_writebatch_create();
|
||||
extern void rocksdb_writebatch_destroy(rocksdb_writebatch_t*);
|
||||
extern void rocksdb_writebatch_clear(rocksdb_writebatch_t*);
|
||||
extern void rocksdb_writebatch_put(
|
||||
rocksdb_writebatch_t*,
|
||||
const char* key, size_t klen,
|
||||
const char* val, size_t vlen);
|
||||
extern void leveldb_writebatch_delete(
|
||||
leveldb_writebatch_t*,
|
||||
extern void rocksdb_writebatch_delete(
|
||||
rocksdb_writebatch_t*,
|
||||
const char* key, size_t klen);
|
||||
extern void leveldb_writebatch_iterate(
|
||||
leveldb_writebatch_t*,
|
||||
extern void rocksdb_writebatch_iterate(
|
||||
rocksdb_writebatch_t*,
|
||||
void* state,
|
||||
void (*put)(void*, const char* k, size_t klen, const char* v, size_t vlen),
|
||||
void (*deleted)(void*, const char* k, size_t klen));
|
||||
|
||||
/* Options */
|
||||
|
||||
extern leveldb_options_t* leveldb_options_create();
|
||||
extern void leveldb_options_destroy(leveldb_options_t*);
|
||||
extern void leveldb_options_set_comparator(
|
||||
leveldb_options_t*,
|
||||
leveldb_comparator_t*);
|
||||
extern void leveldb_options_set_compression_per_level(
|
||||
leveldb_options_t* opt,
|
||||
extern rocksdb_options_t* rocksdb_options_create();
|
||||
extern void rocksdb_options_destroy(rocksdb_options_t*);
|
||||
extern void rocksdb_options_set_comparator(
|
||||
rocksdb_options_t*,
|
||||
rocksdb_comparator_t*);
|
||||
extern void rocksdb_options_set_compression_per_level(
|
||||
rocksdb_options_t* opt,
|
||||
int* level_values,
|
||||
size_t num_levels);
|
||||
extern void leveldb_options_set_filter_policy(
|
||||
leveldb_options_t*,
|
||||
leveldb_filterpolicy_t*);
|
||||
extern void leveldb_options_set_create_if_missing(
|
||||
leveldb_options_t*, unsigned char);
|
||||
extern void leveldb_options_set_error_if_exists(
|
||||
leveldb_options_t*, unsigned char);
|
||||
extern void leveldb_options_set_paranoid_checks(
|
||||
leveldb_options_t*, unsigned char);
|
||||
extern void leveldb_options_set_env(leveldb_options_t*, leveldb_env_t*);
|
||||
extern void leveldb_options_set_info_log(leveldb_options_t*, leveldb_logger_t*);
|
||||
extern void leveldb_options_set_write_buffer_size(leveldb_options_t*, size_t);
|
||||
extern void leveldb_options_set_max_open_files(leveldb_options_t*, int);
|
||||
extern void leveldb_options_set_cache(leveldb_options_t*, leveldb_cache_t*);
|
||||
extern void leveldb_options_set_block_size(leveldb_options_t*, size_t);
|
||||
extern void leveldb_options_set_block_restart_interval(leveldb_options_t*, int);
|
||||
extern void leveldb_options_set_compression_options(
|
||||
leveldb_options_t* opt, int w_bits, int level, int strategy);
|
||||
extern void rocksdb_options_set_filter_policy(
|
||||
rocksdb_options_t*,
|
||||
rocksdb_filterpolicy_t*);
|
||||
extern void rocksdb_options_set_create_if_missing(
|
||||
rocksdb_options_t*, unsigned char);
|
||||
extern void rocksdb_options_set_error_if_exists(
|
||||
rocksdb_options_t*, unsigned char);
|
||||
extern void rocksdb_options_set_paranoid_checks(
|
||||
rocksdb_options_t*, unsigned char);
|
||||
extern void rocksdb_options_set_env(rocksdb_options_t*, rocksdb_env_t*);
|
||||
extern void rocksdb_options_set_info_log(rocksdb_options_t*, rocksdb_logger_t*);
|
||||
extern void rocksdb_options_set_write_buffer_size(rocksdb_options_t*, size_t);
|
||||
extern void rocksdb_options_set_max_open_files(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_cache(rocksdb_options_t*, rocksdb_cache_t*);
|
||||
extern void rocksdb_options_set_block_size(rocksdb_options_t*, size_t);
|
||||
extern void rocksdb_options_set_block_restart_interval(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_compression_options(
|
||||
rocksdb_options_t*, int, int, int);
|
||||
extern void rocksdb_options_set_num_levels(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_level0_file_num_compaction_trigger(
|
||||
rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_level0_slowdown_writes_trigger(
|
||||
rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_level0_stop_writes_trigger(
|
||||
rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_target_file_size_base(
|
||||
rocksdb_options_t*, uint64_t);
|
||||
extern void rocksdb_options_set_target_file_size_multiplier(
|
||||
rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_max_write_buffer_number(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_min_write_buffer_number_to_merge(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_max_background_compactions(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_max_background_flushes(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_use_fsync(
|
||||
rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_disable_data_sync(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_disable_auto_compactions(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_disable_seek_compaction(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_source_compaction_factor(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_prepare_for_bulk_load(rocksdb_options_t*);
|
||||
extern void rocksdb_options_set_memtable_vector_rep(rocksdb_options_t*);
|
||||
|
||||
|
||||
enum {
|
||||
leveldb_no_compression = 0,
|
||||
leveldb_snappy_compression = 1
|
||||
rocksdb_no_compression = 0,
|
||||
rocksdb_snappy_compression = 1,
|
||||
rocksdb_zlib_compression = 1,
|
||||
rocksdb_bz2_compression = 1
|
||||
};
|
||||
extern void leveldb_options_set_compression(leveldb_options_t*, int);
|
||||
extern void rocksdb_options_set_compression(rocksdb_options_t*, int);
|
||||
|
||||
enum {
|
||||
rocksdb_level_compaction = 0,
|
||||
rocksdb_universal_compaction = 1
|
||||
};
|
||||
extern void rocksdb_options_set_compaction_style(rocksdb_options_t*, int);
|
||||
extern void rocksdb_options_set_universal_compaction_options(rocksdb_options_t*, rocksdb_universal_compaction_options_t*);
|
||||
/* Comparator */
|
||||
|
||||
extern leveldb_comparator_t* leveldb_comparator_create(
|
||||
extern rocksdb_comparator_t* rocksdb_comparator_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
int (*compare)(
|
||||
@ -226,11 +259,11 @@ extern leveldb_comparator_t* leveldb_comparator_create(
|
||||
const char* a, size_t alen,
|
||||
const char* b, size_t blen),
|
||||
const char* (*name)(void*));
|
||||
extern void leveldb_comparator_destroy(leveldb_comparator_t*);
|
||||
extern void rocksdb_comparator_destroy(rocksdb_comparator_t*);
|
||||
|
||||
/* Filter policy */
|
||||
|
||||
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
extern rocksdb_filterpolicy_t* rocksdb_filterpolicy_create(
|
||||
void* state,
|
||||
void (*destructor)(void*),
|
||||
char* (*create_filter)(
|
||||
@ -243,40 +276,65 @@ extern leveldb_filterpolicy_t* leveldb_filterpolicy_create(
|
||||
const char* key, size_t length,
|
||||
const char* filter, size_t filter_length),
|
||||
const char* (*name)(void*));
|
||||
extern void leveldb_filterpolicy_destroy(leveldb_filterpolicy_t*);
|
||||
extern void rocksdb_filterpolicy_destroy(rocksdb_filterpolicy_t*);
|
||||
|
||||
extern leveldb_filterpolicy_t* leveldb_filterpolicy_create_bloom(
|
||||
extern rocksdb_filterpolicy_t* rocksdb_filterpolicy_create_bloom(
|
||||
int bits_per_key);
|
||||
|
||||
/* Read options */
|
||||
|
||||
extern leveldb_readoptions_t* leveldb_readoptions_create();
|
||||
extern void leveldb_readoptions_destroy(leveldb_readoptions_t*);
|
||||
extern void leveldb_readoptions_set_verify_checksums(
|
||||
leveldb_readoptions_t*,
|
||||
extern rocksdb_readoptions_t* rocksdb_readoptions_create();
|
||||
extern void rocksdb_readoptions_destroy(rocksdb_readoptions_t*);
|
||||
extern void rocksdb_readoptions_set_verify_checksums(
|
||||
rocksdb_readoptions_t*,
|
||||
unsigned char);
|
||||
extern void leveldb_readoptions_set_fill_cache(
|
||||
leveldb_readoptions_t*, unsigned char);
|
||||
extern void leveldb_readoptions_set_snapshot(
|
||||
leveldb_readoptions_t*,
|
||||
const leveldb_snapshot_t*);
|
||||
extern void rocksdb_readoptions_set_fill_cache(
|
||||
rocksdb_readoptions_t*, unsigned char);
|
||||
extern void rocksdb_readoptions_set_snapshot(
|
||||
rocksdb_readoptions_t*,
|
||||
const rocksdb_snapshot_t*);
|
||||
|
||||
/* Write options */
|
||||
|
||||
extern leveldb_writeoptions_t* leveldb_writeoptions_create();
|
||||
extern void leveldb_writeoptions_destroy(leveldb_writeoptions_t*);
|
||||
extern void leveldb_writeoptions_set_sync(
|
||||
leveldb_writeoptions_t*, unsigned char);
|
||||
extern rocksdb_writeoptions_t* rocksdb_writeoptions_create();
|
||||
extern void rocksdb_writeoptions_destroy(rocksdb_writeoptions_t*);
|
||||
extern void rocksdb_writeoptions_set_sync(
|
||||
rocksdb_writeoptions_t*, unsigned char);
|
||||
extern void rocksdb_writeoptions_disable_WAL(rocksdb_writeoptions_t* opt, int disable);
|
||||
|
||||
/* Cache */
|
||||
|
||||
extern leveldb_cache_t* leveldb_cache_create_lru(size_t capacity);
|
||||
extern void leveldb_cache_destroy(leveldb_cache_t* cache);
|
||||
extern rocksdb_cache_t* rocksdb_cache_create_lru(size_t capacity);
|
||||
extern void rocksdb_cache_destroy(rocksdb_cache_t* cache);
|
||||
|
||||
/* Env */
|
||||
|
||||
extern leveldb_env_t* leveldb_create_default_env();
|
||||
extern void leveldb_env_destroy(leveldb_env_t*);
|
||||
extern rocksdb_env_t* rocksdb_create_default_env();
|
||||
extern void rocksdb_env_set_background_threads(rocksdb_env_t* env, int n);
|
||||
extern void rocksdb_env_destroy(rocksdb_env_t*);
|
||||
|
||||
/* Universal Compaction options */
|
||||
|
||||
enum {
|
||||
rocksdb_similar_size_compaction_stop_style = 0,
|
||||
rocksdb_total_size_compaction_stop_style = 1
|
||||
};
|
||||
|
||||
extern rocksdb_universal_compaction_options_t* rocksdb_universal_compaction_options_create() ;
|
||||
extern void rocksdb_universal_compaction_options_set_size_ratio(
|
||||
rocksdb_universal_compaction_options_t*, int);
|
||||
extern void rocksdb_universal_compaction_options_set_min_merge_width(
|
||||
rocksdb_universal_compaction_options_t*, int);
|
||||
extern void rocksdb_universal_compaction_options_set_max_merge_width(
|
||||
rocksdb_universal_compaction_options_t*, int);
|
||||
extern void rocksdb_universal_compaction_options_set_max_size_amplification_percent(
|
||||
rocksdb_universal_compaction_options_t*, int);
|
||||
extern void rocksdb_universal_compaction_options_set_compression_size_percent(
|
||||
rocksdb_universal_compaction_options_t*, int);
|
||||
extern void rocksdb_universal_compaction_options_set_stop_style(
|
||||
rocksdb_universal_compaction_options_t*, int);
|
||||
extern void rocksdb_universal_compaction_options_destroy(
|
||||
rocksdb_universal_compaction_options_t*);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* end extern "C" */
|
||||
|
@ -228,6 +228,10 @@ class DB {
|
||||
// Number of files in level-0 that would stop writes.
|
||||
virtual int Level0StopWriteTrigger() = 0;
|
||||
|
||||
// Get DB name -- the exact same name that was provided as an argument to
|
||||
// DB::Open()
|
||||
virtual const std::string& GetName() const = 0;
|
||||
|
||||
// Get Env object from the DB
|
||||
virtual Env* GetEnv() const = 0;
|
||||
|
||||
|
@ -111,52 +111,72 @@ enum Tickers {
|
||||
BLOCK_CACHE_COMPRESSED_MISS, // miss in the compressed block cache
|
||||
BLOCK_CACHE_COMPRESSED_HIT, // hit in the compressed block cache
|
||||
|
||||
WAL_FILE_SYNCED, // Number of times WAL sync is done
|
||||
WAL_FILE_BYTES, // Number of bytes written to WAL
|
||||
|
||||
// Writes can be processed by requesting thread or by the thread at the
|
||||
// head of the writers queue.
|
||||
WRITE_DONE_BY_SELF,
|
||||
WRITE_DONE_BY_OTHER,
|
||||
|
||||
WRITE_WITH_WAL, // Number of Write calls that request WAL
|
||||
|
||||
COMPACT_READ_BYTES, // Bytes read during compaction
|
||||
COMPACT_WRITE_BYTES, // Bytes written during compaction
|
||||
|
||||
TICKER_ENUM_MAX
|
||||
};
|
||||
|
||||
// The order of items listed in Tickers should be the same as
|
||||
// the order listed in TickersNameMap
|
||||
const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||
{ BLOCK_CACHE_MISS, "rocksdb.block.cache.miss" },
|
||||
{ BLOCK_CACHE_HIT, "rocksdb.block.cache.hit" },
|
||||
{ BLOCK_CACHE_ADD, "rocksdb.block.cache.add" },
|
||||
{ BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss" },
|
||||
{ BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit" },
|
||||
{ BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss" },
|
||||
{ BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit" },
|
||||
{ BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss" },
|
||||
{ BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit" },
|
||||
{ BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful" },
|
||||
{ MEMTABLE_HIT, "rocksdb.memtable.hit" },
|
||||
{ MEMTABLE_MISS, "rocksdb.memtable.miss" },
|
||||
{ COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new" },
|
||||
{ COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete" },
|
||||
{ COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user" },
|
||||
{ NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written" },
|
||||
{ NUMBER_KEYS_READ, "rocksdb.number.keys.read" },
|
||||
{ NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated" },
|
||||
{ BYTES_WRITTEN, "rocksdb.bytes.written" },
|
||||
{ BYTES_READ, "rocksdb.bytes.read" },
|
||||
{ NO_FILE_CLOSES, "rocksdb.no.file.closes" },
|
||||
{ NO_FILE_OPENS, "rocksdb.no.file.opens" },
|
||||
{ NO_FILE_ERRORS, "rocksdb.no.file.errors" },
|
||||
{ STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros" },
|
||||
{ STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros" },
|
||||
{ STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros" },
|
||||
{ RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis" },
|
||||
{ NO_ITERATORS, "rocksdb.num.iterators" },
|
||||
{ NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get" },
|
||||
{ NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read" },
|
||||
{ NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read" },
|
||||
{ NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered" },
|
||||
{ NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures" },
|
||||
{ SEQUENCE_NUMBER, "rocksdb.sequence.number" },
|
||||
{ BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked" },
|
||||
{ BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful" },
|
||||
{ NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration" },
|
||||
{ GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls" },
|
||||
{ BLOCK_CACHE_COMPRESSED_MISS, "rocksdb.block.cachecompressed.miss" },
|
||||
{ BLOCK_CACHE_COMPRESSED_HIT, "rocksdb.block.cachecompressed.hit" },
|
||||
{ BLOCK_CACHE_MISS, "rocksdb.block.cache.miss" },
|
||||
{ BLOCK_CACHE_HIT, "rocksdb.block.cache.hit" },
|
||||
{ BLOCK_CACHE_ADD, "rocksdb.block.cache.add" },
|
||||
{ BLOCK_CACHE_INDEX_MISS, "rocksdb.block.cache.index.miss" },
|
||||
{ BLOCK_CACHE_INDEX_HIT, "rocksdb.block.cache.index.hit" },
|
||||
{ BLOCK_CACHE_FILTER_MISS, "rocksdb.block.cache.filter.miss" },
|
||||
{ BLOCK_CACHE_FILTER_HIT, "rocksdb.block.cache.filter.hit" },
|
||||
{ BLOCK_CACHE_DATA_MISS, "rocksdb.block.cache.data.miss" },
|
||||
{ BLOCK_CACHE_DATA_HIT, "rocksdb.block.cache.data.hit" },
|
||||
{ BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful" },
|
||||
{ MEMTABLE_HIT, "rocksdb.memtable.hit" },
|
||||
{ MEMTABLE_MISS, "rocksdb.memtable.miss" },
|
||||
{ COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new" },
|
||||
{ COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete" },
|
||||
{ COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user" },
|
||||
{ NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written" },
|
||||
{ NUMBER_KEYS_READ, "rocksdb.number.keys.read" },
|
||||
{ NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated" },
|
||||
{ BYTES_WRITTEN, "rocksdb.bytes.written" },
|
||||
{ BYTES_READ, "rocksdb.bytes.read" },
|
||||
{ NO_FILE_CLOSES, "rocksdb.no.file.closes" },
|
||||
{ NO_FILE_OPENS, "rocksdb.no.file.opens" },
|
||||
{ NO_FILE_ERRORS, "rocksdb.no.file.errors" },
|
||||
{ STALL_L0_SLOWDOWN_MICROS, "rocksdb.l0.slowdown.micros" },
|
||||
{ STALL_MEMTABLE_COMPACTION_MICROS, "rocksdb.memtable.compaction.micros" },
|
||||
{ STALL_L0_NUM_FILES_MICROS, "rocksdb.l0.num.files.stall.micros" },
|
||||
{ RATE_LIMIT_DELAY_MILLIS, "rocksdb.rate.limit.delay.millis" },
|
||||
{ NO_ITERATORS, "rocksdb.num.iterators" },
|
||||
{ NUMBER_MULTIGET_CALLS, "rocksdb.number.multiget.get" },
|
||||
{ NUMBER_MULTIGET_KEYS_READ, "rocksdb.number.multiget.keys.read" },
|
||||
{ NUMBER_MULTIGET_BYTES_READ, "rocksdb.number.multiget.bytes.read" },
|
||||
{ NUMBER_FILTERED_DELETES, "rocksdb.number.deletes.filtered" },
|
||||
{ NUMBER_MERGE_FAILURES, "rocksdb.number.merge.failures" },
|
||||
{ SEQUENCE_NUMBER, "rocksdb.sequence.number" },
|
||||
{ BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked" },
|
||||
{ BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful" },
|
||||
{ NUMBER_OF_RESEEKS_IN_ITERATION, "rocksdb.number.reseeks.iteration" },
|
||||
{ GET_UPDATES_SINCE_CALLS, "rocksdb.getupdatessince.calls" },
|
||||
{ BLOCK_CACHE_COMPRESSED_MISS, "rocksdb.block.cachecompressed.miss" },
|
||||
{ BLOCK_CACHE_COMPRESSED_HIT, "rocksdb.block.cachecompressed.hit" },
|
||||
{ WAL_FILE_SYNCED, "rocksdb.wal.synced" },
|
||||
{ WAL_FILE_BYTES, "rocksdb.wal.bytes" },
|
||||
{ WRITE_DONE_BY_SELF, "rocksdb.write.self" },
|
||||
{ WRITE_DONE_BY_OTHER, "rocksdb.write.other" },
|
||||
{ WRITE_WITH_WAL, "rocksdb.write.wal" },
|
||||
{ COMPACT_READ_BYTES, "rocksdb.compact.read.bytes" },
|
||||
{ COMPACT_WRITE_BYTES, "rocksdb.compact.write.bytes" },
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -25,7 +25,7 @@ namespace rocksdb {
|
||||
class Status {
|
||||
public:
|
||||
// Create a success status.
|
||||
Status() : state_(nullptr) { }
|
||||
Status() : code_(kOk), state_(nullptr) { }
|
||||
~Status() { delete[] state_; }
|
||||
|
||||
// Copy the specified status.
|
||||
@ -39,6 +39,10 @@ class Status {
|
||||
static Status NotFound(const Slice& msg, const Slice& msg2 = Slice()) {
|
||||
return Status(kNotFound, msg, msg2);
|
||||
}
|
||||
// Fast path for not found without malloc;
|
||||
static Status NotFound() {
|
||||
return Status(kNotFound);
|
||||
}
|
||||
static Status Corruption(const Slice& msg, const Slice& msg2 = Slice()) {
|
||||
return Status(kCorruption, msg, msg2);
|
||||
}
|
||||
@ -59,7 +63,7 @@ class Status {
|
||||
}
|
||||
|
||||
// Returns true iff the status indicates success.
|
||||
bool ok() const { return (state_ == nullptr); }
|
||||
bool ok() const { return code() == kOk; }
|
||||
|
||||
// Returns true iff the status indicates a NotFound error.
|
||||
bool IsNotFound() const { return code() == kNotFound; }
|
||||
@ -87,13 +91,6 @@ class Status {
|
||||
std::string ToString() const;
|
||||
|
||||
private:
|
||||
// OK status has a nullptr state_. Otherwise, state_ is a new[] array
|
||||
// of the following form:
|
||||
// state_[0..3] == length of message
|
||||
// state_[4] == code
|
||||
// state_[5..] == message
|
||||
const char* state_;
|
||||
|
||||
enum Code {
|
||||
kOk = 0,
|
||||
kNotFound = 1,
|
||||
@ -105,20 +102,30 @@ class Status {
|
||||
kIncomplete = 7
|
||||
};
|
||||
|
||||
Code code() const {
|
||||
return (state_ == nullptr) ? kOk : static_cast<Code>(state_[4]);
|
||||
}
|
||||
// A nullptr state_ (which is always the case for OK) means the message
|
||||
// is empty.
|
||||
// of the following form:
|
||||
// state_[0..3] == length of message
|
||||
// state_[4..] == message
|
||||
Code code_;
|
||||
const char* state_;
|
||||
|
||||
Code code() const {
|
||||
return code_;
|
||||
}
|
||||
explicit Status(Code code) : code_(code), state_(nullptr) { }
|
||||
Status(Code code, const Slice& msg, const Slice& msg2);
|
||||
static const char* CopyState(const char* s);
|
||||
};
|
||||
|
||||
inline Status::Status(const Status& s) {
|
||||
code_ = s.code_;
|
||||
state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_);
|
||||
}
|
||||
inline void Status::operator=(const Status& s) {
|
||||
// The following condition catches both aliasing (when this == &s),
|
||||
// and the common case where both s and *this are ok.
|
||||
code_ = s.code_;
|
||||
if (state_ != s.state_) {
|
||||
delete[] state_;
|
||||
state_ = (s.state_ == nullptr) ? nullptr : CopyState(s.state_);
|
||||
|
@ -56,7 +56,7 @@ class LogFile {
|
||||
};
|
||||
|
||||
struct BatchResult {
|
||||
SequenceNumber sequence = SequenceNumber();
|
||||
SequenceNumber sequence = 0;
|
||||
std::unique_ptr<WriteBatch> writeBatchPtr;
|
||||
};
|
||||
|
||||
|
133
include/utilities/backupable_db.h
Normal file
133
include/utilities/backupable_db.h
Normal file
@ -0,0 +1,133 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#pragma once
|
||||
#include "utilities/stackable_db.h"
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/status.h"
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
struct BackupableDBOptions {
|
||||
// Where to keep the backup files. Has to be different than dbname_
|
||||
// Best to set this to dbname_ + "/backups"
|
||||
// Required
|
||||
std::string backup_dir;
|
||||
|
||||
// Backup Env object. It will be used for backup file I/O. If it's
|
||||
// nullptr, backups will be written out using DBs Env. If it's
|
||||
// non-nullptr, backup's I/O will be performed using this object.
|
||||
// If you want to have backups on HDFS, use HDFS Env here!
|
||||
// Default: nullptr
|
||||
Env* backup_env;
|
||||
|
||||
// Backup info and error messages will be written to info_log
|
||||
// if non-nullptr.
|
||||
// Default: nullptr
|
||||
Logger* info_log;
|
||||
|
||||
// If sync == true, we can guarantee you'll get consistent backup even
|
||||
// on a machine crash/reboot. Backup process is slower with sync enabled.
|
||||
// If sync == false, we don't guarantee anything on machine reboot. However,
|
||||
// chances are some of the backups are consistent.
|
||||
// Default: true
|
||||
bool sync;
|
||||
|
||||
// If true, it will delete whatever backups there are already
|
||||
// Default: false
|
||||
bool destroy_old_data;
|
||||
|
||||
explicit BackupableDBOptions(const std::string& _backup_dir,
|
||||
Env* _backup_env = nullptr,
|
||||
Logger* _info_log = nullptr,
|
||||
bool _sync = true,
|
||||
bool _destroy_old_data = false) :
|
||||
backup_dir(_backup_dir),
|
||||
backup_env(_backup_env),
|
||||
info_log(_info_log),
|
||||
sync(_sync),
|
||||
destroy_old_data(_destroy_old_data) { }
|
||||
};
|
||||
|
||||
class BackupEngine;
|
||||
|
||||
typedef uint32_t BackupID;
|
||||
|
||||
struct BackupInfo {
|
||||
BackupID backup_id;
|
||||
int64_t timestamp;
|
||||
uint64_t size;
|
||||
|
||||
BackupInfo() {}
|
||||
BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size)
|
||||
: backup_id(_backup_id), timestamp(_timestamp), size(_size) {}
|
||||
};
|
||||
|
||||
// Stack your DB with BackupableDB to be able to backup the DB
|
||||
class BackupableDB : public StackableDB {
|
||||
public:
|
||||
// BackupableDBOptions have to be the same as the ones used in a previous
|
||||
// incarnation of the DB
|
||||
//
|
||||
// BackupableDB ownes the pointer `DB* db` now. You should not delete it or
|
||||
// use it after the invocation of BackupableDB
|
||||
BackupableDB(DB* db, const BackupableDBOptions& options);
|
||||
virtual ~BackupableDB();
|
||||
|
||||
// Captures the state of the database in the latest backup
|
||||
// NOT a thread safe call
|
||||
Status CreateNewBackup(bool flush_before_backup = false);
|
||||
// Returns info about backups in backup_info
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
|
||||
// deletes old backups, keeping latest num_backups_to_keep alive
|
||||
Status PurgeOldBackups(uint32_t num_backups_to_keep);
|
||||
// deletes a specific backup
|
||||
Status DeleteBackup(BackupID backup_id);
|
||||
|
||||
private:
|
||||
BackupEngine* backup_engine_;
|
||||
};
|
||||
|
||||
// Use this class to access information about backups and restore from them
|
||||
class RestoreBackupableDB {
|
||||
public:
|
||||
RestoreBackupableDB(Env* db_env, const BackupableDBOptions& options);
|
||||
~RestoreBackupableDB();
|
||||
|
||||
// Returns info about backups in backup_info
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
|
||||
|
||||
// restore from backup with backup_id
|
||||
// IMPORTANT -- if you restore from some backup that is not the latest,
|
||||
// and you start creating new backups from the new DB, all the backups
|
||||
// that were newer than the backup you restored from will be deleted
|
||||
//
|
||||
// Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3.
|
||||
// If you try creating a new backup now, old backups 4 and 5 will be deleted
|
||||
// and new backup with ID 4 will be created.
|
||||
Status RestoreDBFromBackup(BackupID backup_id, const std::string& db_dir,
|
||||
const std::string& wal_dir);
|
||||
|
||||
// restore from the latest backup
|
||||
Status RestoreDBFromLatestBackup(const std::string& db_dir,
|
||||
const std::string& wal_dir);
|
||||
// deletes old backups, keeping latest num_backups_to_keep alive
|
||||
Status PurgeOldBackups(uint32_t num_backups_to_keep);
|
||||
// deletes a specific backup
|
||||
Status DeleteBackup(BackupID backup_id);
|
||||
|
||||
private:
|
||||
BackupEngine* backup_engine_;
|
||||
};
|
||||
|
||||
} // rocksdb namespace
|
@ -103,6 +103,10 @@ class StackableDB : public DB {
|
||||
return db_->Level0StopWriteTrigger();
|
||||
}
|
||||
|
||||
virtual const std::string& GetName() const override {
|
||||
return db_->GetName();
|
||||
}
|
||||
|
||||
virtual Env* GetEnv() const override {
|
||||
return db_->GetEnv();
|
||||
}
|
||||
|
@ -31,12 +31,7 @@ static const char* GetExecutableName()
|
||||
}
|
||||
}
|
||||
|
||||
static void StackTraceHandler(int sig) {
|
||||
// reset to default handler
|
||||
signal(sig, SIG_DFL);
|
||||
|
||||
fprintf(stderr, "Received signal %d (%s)\n", sig, strsignal(sig));
|
||||
|
||||
void PrintStack(int first_frames_to_skip) {
|
||||
const int kMaxFrames = 100;
|
||||
void *frames[kMaxFrames];
|
||||
|
||||
@ -45,11 +40,8 @@ static void StackTraceHandler(int sig) {
|
||||
|
||||
auto executable = GetExecutableName();
|
||||
|
||||
const int kSkip = 2; // skip the top two signal handler related frames
|
||||
|
||||
for (int i = kSkip; i < num_frames; ++i)
|
||||
{
|
||||
fprintf(stderr, "#%-2d %p ", i - kSkip, frames[i]);
|
||||
for (int i = first_frames_to_skip; i < num_frames; ++i) {
|
||||
fprintf(stderr, "#%-2d ", i - first_frames_to_skip);
|
||||
if (symbols) {
|
||||
fprintf(stderr, "%s ", symbols[i]);
|
||||
}
|
||||
@ -57,22 +49,29 @@ static void StackTraceHandler(int sig) {
|
||||
// out source to addr2line, for the address translation
|
||||
const int kLineMax = 256;
|
||||
char cmd[kLineMax];
|
||||
sprintf(cmd,"addr2line %p -e %s 2>&1", frames[i] , executable);
|
||||
sprintf(cmd, "addr2line %p -e %s -f -C 2>&1", frames[i], executable);
|
||||
auto f = popen(cmd, "r");
|
||||
if (f) {
|
||||
char line[kLineMax];
|
||||
while (fgets(line, sizeof(line), f)) {
|
||||
fprintf(stderr, "%s", line);
|
||||
line[strlen(line) - 1] = 0; // remove newline
|
||||
fprintf(stderr, "%s\t", line);
|
||||
}
|
||||
pclose(f);
|
||||
} else {
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " %p", frames[i]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void StackTraceHandler(int sig) {
|
||||
// reset to default handler
|
||||
signal(sig, SIG_DFL);
|
||||
fprintf(stderr, "Received signal %d (%s)\n", sig, strsignal(sig));
|
||||
// skip the top three signal handler related frames
|
||||
PrintStack(3);
|
||||
// re-signal to default handler (so we still get core dump if needed...)
|
||||
raise(sig);
|
||||
}
|
||||
@ -96,6 +95,7 @@ void InstallStackTraceHandler() {
|
||||
namespace rocksdb {
|
||||
|
||||
void InstallStackTraceHandler() {}
|
||||
void PrintStack(int first_frames_to_skip) {}
|
||||
|
||||
}
|
||||
|
||||
|
329
util/autovector.h
Normal file
329
util/autovector.h
Normal file
@ -0,0 +1,329 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
// A vector that leverages pre-allocated stack-based array to achieve better
|
||||
// performance for array with small amount of items.
|
||||
//
|
||||
// The interface resembles that of vector, but with less features since we aim
|
||||
// to solve the problem that we have in hand, rather than implementing a
|
||||
// full-fledged generic container.
|
||||
//
|
||||
// Currently we don't support:
|
||||
// * reserve()/shrink_to_fit()/resize()
|
||||
// If used correctly, in most cases, people should not touch the
|
||||
// underlying vector at all.
|
||||
// * random insert()/erase(), please only use push_back()/pop_back().
|
||||
// * No move/swap operations. Each autovector instance has a
|
||||
// stack-allocated array and if we want support move/swap operations, we
|
||||
// need to copy the arrays other than just swapping the pointers. In this
|
||||
// case we'll just explicitly forbid these operations since they may
|
||||
// lead users to make false assumption by thinking they are inexpensive
|
||||
// operations.
|
||||
//
|
||||
// Naming style of public methods almost follows that of the STL's.
|
||||
template <class T, size_t kSize = 8>
|
||||
class autovector {
|
||||
public:
|
||||
// General STL-style container member types.
|
||||
typedef T value_type;
|
||||
typedef typename std::vector<T>::difference_type difference_type;
|
||||
typedef typename std::vector<T>::size_type size_type;
|
||||
typedef value_type& reference;
|
||||
typedef const value_type& const_reference;
|
||||
typedef value_type* pointer;
|
||||
typedef const value_type* const_pointer;
|
||||
|
||||
// This class is the base for regular/const iterator
|
||||
template <class TAutoVector, class TValueType>
|
||||
class iterator_impl {
|
||||
public:
|
||||
// -- iterator traits
|
||||
typedef iterator_impl<TAutoVector, TValueType> self_type;
|
||||
typedef TValueType value_type;
|
||||
typedef TValueType& reference;
|
||||
typedef TValueType* pointer;
|
||||
typedef typename TAutoVector::difference_type difference_type;
|
||||
typedef std::random_access_iterator_tag iterator_category;
|
||||
|
||||
iterator_impl(TAutoVector* vect, size_t index)
|
||||
: vect_(vect)
|
||||
, index_(index) {
|
||||
};
|
||||
iterator_impl(const iterator_impl&) = default;
|
||||
~iterator_impl() { }
|
||||
iterator_impl& operator=(const iterator_impl&) = default;
|
||||
|
||||
// -- Advancement
|
||||
// iterator++
|
||||
self_type& operator++() {
|
||||
++index_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// ++iterator
|
||||
self_type operator++(int) {
|
||||
auto old = *this;
|
||||
++index_;
|
||||
return old;
|
||||
}
|
||||
|
||||
// iterator--
|
||||
self_type& operator--() {
|
||||
--index_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// --iterator
|
||||
self_type operator--(int) {
|
||||
auto old = *this;
|
||||
--index_;
|
||||
return old;
|
||||
}
|
||||
|
||||
self_type operator-(difference_type len) {
|
||||
return self_type(vect_, index_ - len);
|
||||
}
|
||||
|
||||
difference_type operator-(const self_type& other) {
|
||||
assert(vect_ == other.vect_);
|
||||
return index_ - other.index_;
|
||||
}
|
||||
|
||||
self_type operator+(difference_type len) {
|
||||
return self_type(vect_, index_ + len);
|
||||
}
|
||||
|
||||
self_type& operator+=(difference_type len) {
|
||||
index_ += len;
|
||||
return *this;
|
||||
}
|
||||
|
||||
self_type& operator-=(difference_type len) {
|
||||
index_ -= len;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// -- Reference
|
||||
reference operator*() {
|
||||
assert(vect_->size() >= index_);
|
||||
return (*vect_)[index_];
|
||||
}
|
||||
pointer operator->() {
|
||||
assert(vect_->size() >= index_);
|
||||
return &(*vect_)[index_];
|
||||
}
|
||||
|
||||
// -- Logical Operators
|
||||
bool operator==(const self_type& other) const {
|
||||
assert(vect_ == other.vect_);
|
||||
return index_ == other.index_;
|
||||
}
|
||||
|
||||
bool operator!=(const self_type& other) const {
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
bool operator>(const self_type& other) const {
|
||||
assert(vect_ == other.vect_);
|
||||
return index_ > other.index_;
|
||||
}
|
||||
|
||||
bool operator<(const self_type& other) const {
|
||||
assert(vect_ == other.vect_);
|
||||
return index_ < other.index_;
|
||||
}
|
||||
|
||||
bool operator>=(const self_type& other) const {
|
||||
assert(vect_ == other.vect_);
|
||||
return index_ >= other.index_;
|
||||
}
|
||||
|
||||
bool operator<=(const self_type& other) const {
|
||||
assert(vect_ == other.vect_);
|
||||
return index_ <= other.index_;
|
||||
}
|
||||
|
||||
private:
|
||||
TAutoVector* vect_ = nullptr;
|
||||
size_t index_ = 0;
|
||||
};
|
||||
|
||||
typedef iterator_impl<autovector, value_type> iterator;
|
||||
typedef iterator_impl<const autovector, const value_type> const_iterator;
|
||||
typedef std::reverse_iterator<iterator> reverse_iterator;
|
||||
typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
|
||||
|
||||
autovector() = default;
|
||||
~autovector() = default;
|
||||
|
||||
// -- Immutable operations
|
||||
// Indicate if all data resides in in-stack data structure.
|
||||
bool only_in_stack() const {
|
||||
// If no element was inserted at all, the vector's capacity will be `0`.
|
||||
return vect_.capacity() == 0;
|
||||
}
|
||||
|
||||
size_type size() const {
|
||||
return num_stack_items_ + vect_.size();
|
||||
}
|
||||
|
||||
bool empty() const {
|
||||
return size() == 0;
|
||||
}
|
||||
|
||||
// will not check boundry
|
||||
const_reference operator[](size_type n) const {
|
||||
return n < kSize ? values_[n] : vect_[n - kSize];
|
||||
}
|
||||
|
||||
reference operator[](size_type n) {
|
||||
return n < kSize ? values_[n] : vect_[n - kSize];
|
||||
}
|
||||
|
||||
// will check boundry
|
||||
const_reference at(size_type n) const {
|
||||
if (n >= size()) {
|
||||
throw std::out_of_range("autovector: index out of range");
|
||||
}
|
||||
return (*this)[n];
|
||||
}
|
||||
|
||||
reference at(size_type n) {
|
||||
if (n >= size()) {
|
||||
throw std::out_of_range("autovector: index out of range");
|
||||
}
|
||||
return (*this)[n];
|
||||
}
|
||||
|
||||
reference front() {
|
||||
assert(!empty());
|
||||
return *begin();
|
||||
}
|
||||
|
||||
const_reference front() const {
|
||||
assert(!empty());
|
||||
return *begin();
|
||||
}
|
||||
|
||||
reference back() {
|
||||
assert(!empty());
|
||||
return *(end() - 1);
|
||||
}
|
||||
|
||||
const_reference back() const {
|
||||
assert(!empty());
|
||||
return *(end() - 1);
|
||||
}
|
||||
|
||||
// -- Mutable Operations
|
||||
void push_back(T&& item) {
|
||||
if (num_stack_items_ < kSize) {
|
||||
values_[num_stack_items_++] = std::move(item);
|
||||
} else {
|
||||
vect_.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
void push_back(const T& item) {
|
||||
push_back(value_type(item));
|
||||
}
|
||||
|
||||
template<class... Args>
|
||||
void emplace_back(Args&&... args) {
|
||||
push_back(value_type(args...));
|
||||
}
|
||||
|
||||
void pop_back() {
|
||||
assert(!empty());
|
||||
if (!vect_.empty()) {
|
||||
vect_.pop_back();
|
||||
} else {
|
||||
--num_stack_items_;
|
||||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
num_stack_items_ = 0;
|
||||
vect_.clear();
|
||||
}
|
||||
|
||||
// -- Copy and Assignment
|
||||
autovector& assign(const autovector& other);
|
||||
|
||||
autovector(const autovector& other) {
|
||||
assign(other);
|
||||
}
|
||||
|
||||
autovector& operator=(const autovector& other) {
|
||||
return assign(other);
|
||||
}
|
||||
|
||||
// move operation are disallowed since it is very hard to make sure both
|
||||
// autovectors are allocated from the same function stack.
|
||||
autovector& operator=(autovector&& other) = delete;
|
||||
autovector(autovector&& other) = delete;
|
||||
|
||||
// -- Iterator Operations
|
||||
iterator begin() {
|
||||
return iterator(this, 0);
|
||||
}
|
||||
|
||||
const_iterator begin() const {
|
||||
return const_iterator(this, 0);
|
||||
}
|
||||
|
||||
iterator end() {
|
||||
return iterator(this, this->size());
|
||||
}
|
||||
|
||||
const_iterator end() const {
|
||||
return const_iterator(this, this->size());
|
||||
}
|
||||
|
||||
reverse_iterator rbegin() {
|
||||
return reverse_iterator(end());
|
||||
}
|
||||
|
||||
const_reverse_iterator rbegin() const {
|
||||
return const_reverse_iterator(end());
|
||||
}
|
||||
|
||||
reverse_iterator rend() {
|
||||
return reverse_iterator(begin());
|
||||
}
|
||||
|
||||
const_reverse_iterator rend() const {
|
||||
return const_reverse_iterator(begin());
|
||||
}
|
||||
|
||||
private:
|
||||
size_type num_stack_items_ = 0; // current number of items
|
||||
value_type values_[kSize]; // the first `kSize` items
|
||||
// used only if there are more than `kSize` items.
|
||||
std::vector<T> vect_;
|
||||
};
|
||||
|
||||
template <class T, size_t kSize>
|
||||
autovector<T, kSize>& autovector<T, kSize>::assign(const autovector& other) {
|
||||
// copy the internal vector
|
||||
vect_.assign(other.vect_.begin(), other.vect_.end());
|
||||
|
||||
// copy array
|
||||
num_stack_items_ = other.num_stack_items_;
|
||||
std::copy(other.values_, other.values_ + num_stack_items_, values_);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
} // rocksdb
|
290
util/autovector_test.cc
Normal file
290
util/autovector_test.cc
Normal file
@ -0,0 +1,290 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
|
||||
#include "rocksdb/env.h"
|
||||
#include "util/autovector.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/testutil.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
using namespace std;
|
||||
|
||||
class AutoVectorTest { };
|
||||
|
||||
const size_t kSize = 8;
|
||||
TEST(AutoVectorTest, PushBackAndPopBack) {
|
||||
autovector<size_t, kSize> vec;
|
||||
ASSERT_TRUE(vec.empty());
|
||||
ASSERT_EQ(0ul, vec.size());
|
||||
|
||||
for (size_t i = 0; i < 1000 * kSize; ++i) {
|
||||
vec.push_back(i);
|
||||
ASSERT_TRUE(!vec.empty());
|
||||
if (i < kSize) {
|
||||
ASSERT_TRUE(vec.only_in_stack());
|
||||
} else {
|
||||
ASSERT_TRUE(!vec.only_in_stack());
|
||||
}
|
||||
ASSERT_EQ(i + 1, vec.size());
|
||||
ASSERT_EQ(i, vec[i]);
|
||||
ASSERT_EQ(i, vec.at(i));
|
||||
}
|
||||
|
||||
size_t size = vec.size();
|
||||
while (size != 0) {
|
||||
vec.pop_back();
|
||||
// will always be in heap
|
||||
ASSERT_TRUE(!vec.only_in_stack());
|
||||
ASSERT_EQ(--size, vec.size());
|
||||
}
|
||||
|
||||
ASSERT_TRUE(vec.empty());
|
||||
}
|
||||
|
||||
TEST(AutoVectorTest, EmplaceBack) {
|
||||
typedef std::pair<size_t, std::string> ValueType;
|
||||
autovector<ValueType, kSize> vec;
|
||||
|
||||
for (size_t i = 0; i < 1000 * kSize; ++i) {
|
||||
vec.emplace_back(i, std::to_string(i + 123));
|
||||
ASSERT_TRUE(!vec.empty());
|
||||
if (i < kSize) {
|
||||
ASSERT_TRUE(vec.only_in_stack());
|
||||
} else {
|
||||
ASSERT_TRUE(!vec.only_in_stack());
|
||||
}
|
||||
|
||||
ASSERT_EQ(i + 1, vec.size());
|
||||
ASSERT_EQ(i, vec[i].first);
|
||||
ASSERT_EQ(std::to_string(i + 123), vec[i].second);
|
||||
}
|
||||
|
||||
vec.clear();
|
||||
ASSERT_TRUE(vec.empty());
|
||||
ASSERT_TRUE(!vec.only_in_stack());
|
||||
}
|
||||
|
||||
void AssertEqual(
|
||||
const autovector<size_t, kSize>& a, const autovector<size_t, kSize>& b) {
|
||||
ASSERT_EQ(a.size(), b.size());
|
||||
ASSERT_EQ(a.empty(), b.empty());
|
||||
ASSERT_EQ(a.only_in_stack(), b.only_in_stack());
|
||||
for (size_t i = 0; i < a.size(); ++i) {
|
||||
ASSERT_EQ(a[i], b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AutoVectorTest, CopyAndAssignment) {
|
||||
// Test both heap-allocated and stack-allocated cases.
|
||||
for (auto size : { kSize / 2, kSize * 1000 }) {
|
||||
autovector<size_t, kSize> vec;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
vec.push_back(i);
|
||||
}
|
||||
|
||||
{
|
||||
autovector<size_t, kSize> other;
|
||||
other = vec;
|
||||
AssertEqual(other, vec);
|
||||
}
|
||||
|
||||
{
|
||||
autovector<size_t, kSize> other(vec);
|
||||
AssertEqual(other, vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AutoVectorTest, Iterators) {
|
||||
autovector<std::string, kSize> vec;
|
||||
for (size_t i = 0; i < kSize * 1000; ++i) {
|
||||
vec.push_back(std::to_string(i));
|
||||
}
|
||||
|
||||
// basic operator test
|
||||
ASSERT_EQ(vec.front(), *vec.begin());
|
||||
ASSERT_EQ(vec.back(), *(vec.end() - 1));
|
||||
ASSERT_TRUE(vec.begin() < vec.end());
|
||||
|
||||
// non-const iterator
|
||||
size_t index = 0;
|
||||
for (const auto& item : vec) {
|
||||
ASSERT_EQ(vec[index++], item);
|
||||
}
|
||||
|
||||
index = vec.size() - 1;
|
||||
for (auto pos = vec.rbegin(); pos != vec.rend(); ++pos) {
|
||||
ASSERT_EQ(vec[index--], *pos);
|
||||
}
|
||||
|
||||
// const iterator
|
||||
const auto& cvec = vec;
|
||||
index = 0;
|
||||
for (const auto& item : cvec) {
|
||||
ASSERT_EQ(cvec[index++], item);
|
||||
}
|
||||
|
||||
index = vec.size() - 1;
|
||||
for (auto pos = cvec.rbegin(); pos != cvec.rend(); ++pos) {
|
||||
ASSERT_EQ(cvec[index--], *pos);
|
||||
}
|
||||
|
||||
// forward and backward
|
||||
auto pos = vec.begin();
|
||||
while (pos != vec.end()) {
|
||||
auto old_val = *pos;
|
||||
auto old = pos++;
|
||||
// HACK: make sure -> works
|
||||
ASSERT_TRUE(!old->empty());
|
||||
ASSERT_EQ(old_val, *old);
|
||||
ASSERT_TRUE(pos == vec.end() || old_val != *pos);
|
||||
}
|
||||
|
||||
pos = vec.begin();
|
||||
for (size_t i = 0; i < vec.size(); i += 2) {
|
||||
// Cannot use ASSERT_EQ since that macro depends on iostream serialization
|
||||
ASSERT_TRUE(pos + 2 - 2 == pos);
|
||||
pos += 2;
|
||||
ASSERT_TRUE(pos >= vec.begin());
|
||||
ASSERT_TRUE(pos <= vec.end());
|
||||
|
||||
size_t diff = static_cast<size_t>(pos - vec.begin());
|
||||
ASSERT_EQ(i + 2, diff);
|
||||
}
|
||||
}
|
||||
|
||||
vector<string> GetTestKeys(size_t size) {
|
||||
vector<string> keys;
|
||||
keys.resize(size);
|
||||
|
||||
int index = 0;
|
||||
for (auto& key : keys) {
|
||||
key = "item-" + to_string(index++);
|
||||
}
|
||||
return keys;
|
||||
}
|
||||
|
||||
template<class TVector>
|
||||
void BenchmarkVectorCreationAndInsertion(
|
||||
string name, size_t ops, size_t item_size,
|
||||
const std::vector<typename TVector::value_type>& items) {
|
||||
auto env = Env::Default();
|
||||
|
||||
int index = 0;
|
||||
auto start_time = env->NowNanos();
|
||||
auto ops_remaining = ops;
|
||||
while(ops_remaining--) {
|
||||
TVector v;
|
||||
for (size_t i = 0; i < item_size; ++i) {
|
||||
v.push_back(items[index++]);
|
||||
}
|
||||
}
|
||||
auto elapsed = env->NowNanos() - start_time;
|
||||
cout << "created " << ops << " " << name << " instances:\n\t"
|
||||
<< "each was inserted with " << item_size << " elements\n\t"
|
||||
<< "total time elapsed: " << elapsed << " (ns)" << endl;
|
||||
}
|
||||
|
||||
template <class TVector>
|
||||
size_t BenchmarkSequenceAccess(string name, size_t ops, size_t elem_size) {
|
||||
TVector v;
|
||||
for (const auto& item : GetTestKeys(elem_size)) {
|
||||
v.push_back(item);
|
||||
}
|
||||
auto env = Env::Default();
|
||||
|
||||
auto ops_remaining = ops;
|
||||
auto start_time = env->NowNanos();
|
||||
size_t total = 0;
|
||||
while (ops_remaining--) {
|
||||
auto end = v.end();
|
||||
for (auto pos = v.begin(); pos != end; ++pos) {
|
||||
total += pos->size();
|
||||
}
|
||||
}
|
||||
auto elapsed = env->NowNanos() - start_time;
|
||||
cout << "performed " << ops << " sequence access against " << name << "\n\t"
|
||||
<< "size: " << elem_size << "\n\t"
|
||||
<< "total time elapsed: " << elapsed << " (ns)" << endl;
|
||||
// HACK avoid compiler's optimization to ignore total
|
||||
return total;
|
||||
}
|
||||
|
||||
// This test case only reports the performance between std::vector<string>
|
||||
// and autovector<string>. We chose string for comparison because in most
|
||||
// o our use cases we used std::vector<string>.
|
||||
TEST(AutoVectorTest, PerfBench) {
|
||||
// We run same operations for kOps times in order to get a more fair result.
|
||||
size_t kOps = 100000;
|
||||
|
||||
// Creation and insertion test
|
||||
// Test the case when there is:
|
||||
// * no element inserted: internal array of std::vector may not really get
|
||||
// initialize.
|
||||
// * one element inserted: internal array of std::vector must have
|
||||
// initialized.
|
||||
// * kSize elements inserted. This shows the most time we'll spend if we
|
||||
// keep everything in stack.
|
||||
// * 2 * kSize elements inserted. The internal vector of
|
||||
// autovector must have been initialized.
|
||||
cout << "=====================================================" << endl;
|
||||
cout << "Creation and Insertion Test (value type: std::string)" << endl;
|
||||
cout << "=====================================================" << endl;
|
||||
|
||||
// pre-generated unique keys
|
||||
auto string_keys = GetTestKeys(kOps * 2 * kSize);
|
||||
for (auto insertions : { 0ul, 1ul, kSize / 2, kSize, 2 * kSize }) {
|
||||
BenchmarkVectorCreationAndInsertion<vector<string>>(
|
||||
"vector<string>", kOps, insertions, string_keys
|
||||
);
|
||||
BenchmarkVectorCreationAndInsertion<autovector<string, kSize>>(
|
||||
"autovector<string>", kOps, insertions, string_keys
|
||||
);
|
||||
cout << "-----------------------------------" << endl;
|
||||
}
|
||||
|
||||
cout << "=====================================================" << endl;
|
||||
cout << "Creation and Insertion Test (value type: uint64_t)" << endl;
|
||||
cout << "=====================================================" << endl;
|
||||
|
||||
// pre-generated unique keys
|
||||
vector<uint64_t> int_keys(kOps * 2 * kSize);
|
||||
for (size_t i = 0; i < kOps * 2 * kSize; ++i) {
|
||||
int_keys[i] = i;
|
||||
}
|
||||
for (auto insertions : { 0ul, 1ul, kSize / 2, kSize, 2 * kSize }) {
|
||||
BenchmarkVectorCreationAndInsertion<vector<uint64_t>>(
|
||||
"vector<uint64_t>", kOps, insertions, int_keys
|
||||
);
|
||||
BenchmarkVectorCreationAndInsertion<autovector<uint64_t, kSize>>(
|
||||
"autovector<uint64_t>", kOps, insertions, int_keys
|
||||
);
|
||||
cout << "-----------------------------------" << endl;
|
||||
}
|
||||
|
||||
// Sequence Access Test
|
||||
cout << "=====================================================" << endl;
|
||||
cout << "Sequence Access Test" << endl;
|
||||
cout << "=====================================================" << endl;
|
||||
for (auto elem_size : { kSize / 2, kSize, 2 * kSize }) {
|
||||
BenchmarkSequenceAccess<vector<string>>(
|
||||
"vector", kOps, elem_size
|
||||
);
|
||||
BenchmarkSequenceAccess<autovector<string, kSize>>(
|
||||
"autovector", kOps, elem_size
|
||||
);
|
||||
cout << "-----------------------------------" << endl;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return rocksdb::test::RunAllTests();
|
||||
}
|
@ -10,7 +10,7 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
|
||||
#include "rocksdb/cache.h"
|
||||
#include "port/port.h"
|
||||
@ -111,8 +111,8 @@ class HandleTable {
|
||||
}
|
||||
|
||||
void Resize() {
|
||||
uint32_t new_length = 4;
|
||||
while (new_length < elems_) {
|
||||
uint32_t new_length = 16;
|
||||
while (new_length < elems_ * 1.5) {
|
||||
new_length *= 2;
|
||||
}
|
||||
LRUHandle** new_list = new LRUHandle*[new_length];
|
||||
@ -264,18 +264,20 @@ Cache::Handle* LRUCache::Insert(
|
||||
|
||||
LRUHandle* e = reinterpret_cast<LRUHandle*>(
|
||||
malloc(sizeof(LRUHandle)-1 + key.size()));
|
||||
std::list<LRUHandle*> last_reference_list;
|
||||
std::vector<LRUHandle*> last_reference_list;
|
||||
last_reference_list.reserve(1);
|
||||
|
||||
e->value = value;
|
||||
e->deleter = deleter;
|
||||
e->charge = charge;
|
||||
e->key_length = key.size();
|
||||
e->hash = hash;
|
||||
e->refs = 2; // One from LRUCache, one for the returned handle
|
||||
memcpy(e->key_data, key.data(), key.size());
|
||||
|
||||
{
|
||||
MutexLock l(&mutex_);
|
||||
|
||||
e->value = value;
|
||||
e->deleter = deleter;
|
||||
e->charge = charge;
|
||||
e->key_length = key.size();
|
||||
e->hash = hash;
|
||||
e->refs = 2; // One from LRUCache, one for the returned handle
|
||||
memcpy(e->key_data, key.data(), key.size());
|
||||
LRU_Append(e);
|
||||
|
||||
LRUHandle* old = table_.Insert(e);
|
||||
|
@ -217,6 +217,17 @@ Slice GetLengthPrefixedSlice(const char* data) {
|
||||
return Slice(p, len);
|
||||
}
|
||||
|
||||
Slice GetSliceUntil(Slice* slice, char delimiter) {
|
||||
uint32_t len;
|
||||
for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
|
||||
// nothing
|
||||
}
|
||||
|
||||
Slice ret(slice->data(), len);
|
||||
slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
|
||||
return ret;
|
||||
}
|
||||
|
||||
void BitStreamPutInt(char* dst, size_t dstlen, size_t offset,
|
||||
uint32_t bits, uint64_t value) {
|
||||
assert((offset + bits + 7)/8 <= dstlen);
|
||||
|
@ -40,6 +40,8 @@ extern bool GetVarint64(Slice* input, uint64_t* value);
|
||||
extern bool GetLengthPrefixedSlice(Slice* input, Slice* result);
|
||||
extern Slice GetLengthPrefixedSlice(const char* data);
|
||||
|
||||
extern Slice GetSliceUntil(Slice* slice, char delimiter);
|
||||
|
||||
// Pointer-based variants of GetVarint... These either store a value
|
||||
// in *v and return a pointer just past the parsed value, or return
|
||||
// nullptr on error. These routines only look at bytes in the range
|
||||
|
@ -395,7 +395,7 @@ class PosixMmapFile : public WritableFile {
|
||||
}
|
||||
|
||||
Status MapNewRegion() {
|
||||
#ifdef OS_LINUX
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
assert(base_ == nullptr);
|
||||
|
||||
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
||||
@ -581,7 +581,7 @@ class PosixMmapFile : public WritableFile {
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
virtual Status Allocate(off_t offset, off_t len) {
|
||||
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
||||
if (!fallocate(fd_, FALLOC_FL_KEEP_SIZE, offset, len)) {
|
||||
@ -758,7 +758,7 @@ class PosixWritableFile : public WritableFile {
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
virtual Status Allocate(off_t offset, off_t len) {
|
||||
TEST_KILL_RANDOM(rocksdb_kill_odds);
|
||||
if (!fallocate(fd_, FALLOC_FL_KEEP_SIZE, offset, len)) {
|
||||
@ -862,7 +862,7 @@ class PosixRandomRWFile : public RandomRWFile {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
virtual Status Allocate(off_t offset, off_t len) {
|
||||
if (!fallocate(fd_, FALLOC_FL_KEEP_SIZE, offset, len)) {
|
||||
return Status::OK();
|
||||
@ -1303,7 +1303,7 @@ class PosixEnv : public Env {
|
||||
}
|
||||
|
||||
bool SupportsFastAllocate(const std::string& path) {
|
||||
#ifdef OS_LINUX
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
struct statfs s;
|
||||
if (statfs(path.c_str(), &s)){
|
||||
return false;
|
||||
|
@ -36,15 +36,19 @@ class PosixLogger : public Logger {
|
||||
const static uint64_t flush_every_seconds_ = 5;
|
||||
std::atomic_uint_fast64_t last_flush_micros_;
|
||||
Env* env_;
|
||||
bool flush_pending_;
|
||||
public:
|
||||
PosixLogger(FILE* f, uint64_t (*gettid)(), Env* env) :
|
||||
file_(f), gettid_(gettid), log_size_(0), fd_(fileno(f)),
|
||||
last_flush_micros_(0), env_(env) { }
|
||||
last_flush_micros_(0), env_(env), flush_pending_(false) { }
|
||||
virtual ~PosixLogger() {
|
||||
fclose(file_);
|
||||
}
|
||||
virtual void Flush() {
|
||||
fflush(file_);
|
||||
if (flush_pending_) {
|
||||
flush_pending_ = false;
|
||||
fflush(file_);
|
||||
}
|
||||
last_flush_micros_ = env_->NowMicros();
|
||||
}
|
||||
virtual void Logv(const char* format, va_list ap) {
|
||||
@ -107,7 +111,7 @@ class PosixLogger : public Logger {
|
||||
assert(p <= limit);
|
||||
const size_t write_size = p - base;
|
||||
|
||||
#ifdef OS_LINUX
|
||||
#ifdef ROCKSDB_FALLOCATE_PRESENT
|
||||
// If this write would cross a boundary of kDebugLogChunkSize
|
||||
// space, pre-allocate more space to avoid overly large
|
||||
// allocations from filesystem allocsize options.
|
||||
@ -124,6 +128,7 @@ class PosixLogger : public Logger {
|
||||
#endif
|
||||
|
||||
size_t sz = fwrite(base, 1, write_size, file_);
|
||||
flush_pending_ = true;
|
||||
assert(sz == write_size);
|
||||
if (sz > 0) {
|
||||
log_size_ += write_size;
|
||||
@ -131,6 +136,7 @@ class PosixLogger : public Logger {
|
||||
uint64_t now_micros = static_cast<uint64_t>(now_tv.tv_sec) * 1000000 +
|
||||
now_tv.tv_usec;
|
||||
if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) {
|
||||
flush_pending_ = false;
|
||||
fflush(file_);
|
||||
last_flush_micros_ = now_micros;
|
||||
}
|
||||
|
@ -11,4 +11,7 @@ namespace rocksdb {
|
||||
// Currently supports linux only. No-op otherwise.
|
||||
void InstallStackTraceHandler();
|
||||
|
||||
// Prints stack, skips skip_first_frames frames
|
||||
void PrintStack(int first_frames_to_skip = 0);
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -16,68 +16,65 @@ namespace rocksdb {
|
||||
const char* Status::CopyState(const char* state) {
|
||||
uint32_t size;
|
||||
memcpy(&size, state, sizeof(size));
|
||||
char* result = new char[size + 5];
|
||||
memcpy(result, state, size + 5);
|
||||
char* result = new char[size + 4];
|
||||
memcpy(result, state, size + 4);
|
||||
return result;
|
||||
}
|
||||
|
||||
Status::Status(Code code, const Slice& msg, const Slice& msg2) {
|
||||
Status::Status(Code code, const Slice& msg, const Slice& msg2) :
|
||||
code_(code) {
|
||||
assert(code != kOk);
|
||||
const uint32_t len1 = msg.size();
|
||||
const uint32_t len2 = msg2.size();
|
||||
const uint32_t size = len1 + (len2 ? (2 + len2) : 0);
|
||||
char* result = new char[size + 5];
|
||||
char* result = new char[size + 4];
|
||||
memcpy(result, &size, sizeof(size));
|
||||
result[4] = static_cast<char>(code);
|
||||
memcpy(result + 5, msg.data(), len1);
|
||||
memcpy(result + 4, msg.data(), len1);
|
||||
if (len2) {
|
||||
result[5 + len1] = ':';
|
||||
result[6 + len1] = ' ';
|
||||
memcpy(result + 7 + len1, msg2.data(), len2);
|
||||
result[4 + len1] = ':';
|
||||
result[5 + len1] = ' ';
|
||||
memcpy(result + 6 + len1, msg2.data(), len2);
|
||||
}
|
||||
state_ = result;
|
||||
}
|
||||
|
||||
std::string Status::ToString() const {
|
||||
if (state_ == nullptr) {
|
||||
return "OK";
|
||||
} else {
|
||||
char tmp[30];
|
||||
const char* type;
|
||||
switch (code()) {
|
||||
case kOk:
|
||||
type = "OK";
|
||||
break;
|
||||
case kNotFound:
|
||||
type = "NotFound: ";
|
||||
break;
|
||||
case kCorruption:
|
||||
type = "Corruption: ";
|
||||
break;
|
||||
case kNotSupported:
|
||||
type = "Not implemented: ";
|
||||
break;
|
||||
case kInvalidArgument:
|
||||
type = "Invalid argument: ";
|
||||
break;
|
||||
case kIOError:
|
||||
type = "IO error: ";
|
||||
break;
|
||||
case kMergeInProgress:
|
||||
type = "Merge In Progress: ";
|
||||
break;
|
||||
default:
|
||||
snprintf(tmp, sizeof(tmp), "Unknown code(%d): ",
|
||||
static_cast<int>(code()));
|
||||
type = tmp;
|
||||
break;
|
||||
}
|
||||
std::string result(type);
|
||||
char tmp[30];
|
||||
const char* type;
|
||||
switch (code_) {
|
||||
case kOk:
|
||||
return "OK";
|
||||
case kNotFound:
|
||||
type = "NotFound: ";
|
||||
break;
|
||||
case kCorruption:
|
||||
type = "Corruption: ";
|
||||
break;
|
||||
case kNotSupported:
|
||||
type = "Not implemented: ";
|
||||
break;
|
||||
case kInvalidArgument:
|
||||
type = "Invalid argument: ";
|
||||
break;
|
||||
case kIOError:
|
||||
type = "IO error: ";
|
||||
break;
|
||||
case kMergeInProgress:
|
||||
type = "Merge In Progress: ";
|
||||
break;
|
||||
default:
|
||||
snprintf(tmp, sizeof(tmp), "Unknown code(%d): ",
|
||||
static_cast<int>(code()));
|
||||
type = tmp;
|
||||
break;
|
||||
}
|
||||
std::string result(type);
|
||||
if (state_ != nullptr) {
|
||||
uint32_t length;
|
||||
memcpy(&length, state_, sizeof(length));
|
||||
result.append(state_ + 5, length);
|
||||
return result;
|
||||
result.append(state_ + 4, length);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "rocksdb/env.h"
|
||||
#include "rocksdb/slice.h"
|
||||
#include "util/random.h"
|
||||
#include "util/stack_trace.h"
|
||||
|
||||
namespace rocksdb {
|
||||
namespace test {
|
||||
@ -58,6 +59,7 @@ class Tester {
|
||||
~Tester() {
|
||||
if (!ok_) {
|
||||
fprintf(stderr, "%s:%d:%s\n", fname_, line_, ss_.str().c_str());
|
||||
PrintStack(2);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
BIN
utilities/.DS_Store
vendored
BIN
utilities/.DS_Store
vendored
Binary file not shown.
874
utilities/backupable/backupable_db.cc
Normal file
874
utilities/backupable/backupable_db.cc
Normal file
@ -0,0 +1,874 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "utilities/backupable_db.h"
|
||||
#include "db/filename.h"
|
||||
#include "util/coding.h"
|
||||
#include "rocksdb/transaction_log.h"
|
||||
|
||||
#define __STDC_FORMAT_MACROS
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <limits>
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
// -------- BackupEngine class ---------
|
||||
class BackupEngine {
|
||||
public:
|
||||
BackupEngine(Env* db_env, const BackupableDBOptions& options);
|
||||
~BackupEngine();
|
||||
Status CreateNewBackup(DB* db, bool flush_before_backup = false);
|
||||
Status PurgeOldBackups(uint32_t num_backups_to_keep);
|
||||
Status DeleteBackup(BackupID backup_id);
|
||||
|
||||
void GetBackupInfo(std::vector<BackupInfo>* backup_info);
|
||||
Status RestoreDBFromBackup(BackupID backup_id, const std::string &db_dir,
|
||||
const std::string &wal_dir);
|
||||
Status RestoreDBFromLatestBackup(const std::string &db_dir,
|
||||
const std::string &wal_dir) {
|
||||
return RestoreDBFromBackup(latest_backup_id_, db_dir, wal_dir);
|
||||
}
|
||||
|
||||
void DeleteBackupsNewerThan(uint64_t sequence_number);
|
||||
|
||||
private:
|
||||
class BackupMeta {
|
||||
public:
|
||||
BackupMeta(const std::string& meta_filename,
|
||||
std::unordered_map<std::string, int>* file_refs, Env* env)
|
||||
: timestamp_(0), size_(0), meta_filename_(meta_filename),
|
||||
file_refs_(file_refs), env_(env) {}
|
||||
|
||||
~BackupMeta() {}
|
||||
|
||||
void RecordTimestamp() {
|
||||
env_->GetCurrentTime(×tamp_);
|
||||
}
|
||||
int64_t GetTimestamp() const {
|
||||
return timestamp_;
|
||||
}
|
||||
uint64_t GetSize() const {
|
||||
return size_;
|
||||
}
|
||||
void SetSequenceNumber(uint64_t sequence_number) {
|
||||
sequence_number_ = sequence_number;
|
||||
}
|
||||
uint64_t GetSequenceNumber() {
|
||||
return sequence_number_;
|
||||
}
|
||||
|
||||
void AddFile(const std::string& filename, uint64_t size);
|
||||
void Delete();
|
||||
|
||||
bool Empty() {
|
||||
return files_.empty();
|
||||
}
|
||||
|
||||
const std::vector<std::string>& GetFiles() {
|
||||
return files_;
|
||||
}
|
||||
|
||||
Status LoadFromFile(const std::string& backup_dir);
|
||||
Status StoreToFile(bool sync);
|
||||
|
||||
private:
|
||||
int64_t timestamp_;
|
||||
// sequence number is only approximate, should not be used
|
||||
// by clients
|
||||
uint64_t sequence_number_;
|
||||
uint64_t size_;
|
||||
std::string const meta_filename_;
|
||||
// files with relative paths (without "/" prefix!!)
|
||||
std::vector<std::string> files_;
|
||||
std::unordered_map<std::string, int>* file_refs_;
|
||||
Env* env_;
|
||||
|
||||
static const size_t max_backup_meta_file_size_ = 10 * 1024 * 1024; // 10MB
|
||||
}; // BackupMeta
|
||||
|
||||
inline std::string GetAbsolutePath(
|
||||
const std::string &relative_path = "") const {
|
||||
assert(relative_path.size() == 0 || relative_path[0] != '/');
|
||||
return options_.backup_dir + "/" + relative_path;
|
||||
}
|
||||
inline std::string GetPrivateDirRel() const {
|
||||
return "private";
|
||||
}
|
||||
inline std::string GetPrivateFileRel(BackupID backup_id,
|
||||
const std::string &file = "") const {
|
||||
assert(file.size() == 0 || file[0] != '/');
|
||||
return GetPrivateDirRel() + "/" + std::to_string(backup_id) + "/" + file;
|
||||
}
|
||||
inline std::string GetSharedFileRel(const std::string& file = "") const {
|
||||
assert(file.size() == 0 || file[0] != '/');
|
||||
return "shared/" + file;
|
||||
}
|
||||
inline std::string GetLatestBackupFile(bool tmp = false) const {
|
||||
return GetAbsolutePath(std::string("LATEST_BACKUP") + (tmp ? ".tmp" : ""));
|
||||
}
|
||||
inline std::string GetBackupMetaDir() const {
|
||||
return GetAbsolutePath("meta");
|
||||
}
|
||||
inline std::string GetBackupMetaFile(BackupID backup_id) const {
|
||||
return GetBackupMetaDir() + "/" + std::to_string(backup_id);
|
||||
}
|
||||
|
||||
Status GetLatestBackupFileContents(uint32_t* latest_backup);
|
||||
Status PutLatestBackupFileContents(uint32_t latest_backup);
|
||||
// if size_limit == 0, there is no size limit, copy everything
|
||||
Status CopyFile(const std::string& src,
|
||||
const std::string& dst,
|
||||
Env* src_env,
|
||||
Env* dst_env,
|
||||
bool sync,
|
||||
uint64_t* size = nullptr,
|
||||
uint64_t size_limit = 0);
|
||||
// if size_limit == 0, there is no size limit, copy everything
|
||||
Status BackupFile(BackupID backup_id,
|
||||
BackupMeta* backup,
|
||||
bool shared,
|
||||
const std::string& src_dir,
|
||||
const std::string& src_fname, // starts with "/"
|
||||
uint64_t size_limit = 0);
|
||||
// Will delete all the files we don't need anymore
|
||||
// If full_scan == true, it will do the full scan of files/ directory
|
||||
// and delete all the files that are not referenced from backuped_file_refs_
|
||||
void GarbageCollection(bool full_scan);
|
||||
|
||||
// backup state data
|
||||
BackupID latest_backup_id_;
|
||||
std::map<BackupID, BackupMeta> backups_;
|
||||
std::unordered_map<std::string, int> backuped_file_refs_;
|
||||
std::vector<BackupID> obsolete_backups_;
|
||||
|
||||
// options data
|
||||
BackupableDBOptions options_;
|
||||
Env* db_env_;
|
||||
Env* backup_env_;
|
||||
|
||||
static const size_t copy_file_buffer_size_ = 5 * 1024 * 1024LL; // 5MB
|
||||
};
|
||||
|
||||
BackupEngine::BackupEngine(Env* db_env, const BackupableDBOptions& options)
|
||||
: options_(options),
|
||||
db_env_(db_env),
|
||||
backup_env_(options.backup_env != nullptr ? options.backup_env : db_env_) {
|
||||
|
||||
// create all the dirs we need
|
||||
backup_env_->CreateDirIfMissing(GetAbsolutePath());
|
||||
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetSharedFileRel()));
|
||||
backup_env_->CreateDirIfMissing(GetAbsolutePath(GetPrivateDirRel()));
|
||||
backup_env_->CreateDirIfMissing(GetBackupMetaDir());
|
||||
|
||||
std::vector<std::string> backup_meta_files;
|
||||
backup_env_->GetChildren(GetBackupMetaDir(), &backup_meta_files);
|
||||
// create backups_ structure
|
||||
for (auto& file : backup_meta_files) {
|
||||
BackupID backup_id = 0;
|
||||
sscanf(file.c_str(), "%u", &backup_id);
|
||||
if (backup_id == 0 || file != std::to_string(backup_id)) {
|
||||
// invalid file name, delete that
|
||||
backup_env_->DeleteFile(GetBackupMetaDir() + "/" + file);
|
||||
continue;
|
||||
}
|
||||
assert(backups_.find(backup_id) == backups_.end());
|
||||
backups_.insert(std::make_pair(
|
||||
backup_id, BackupMeta(GetBackupMetaFile(backup_id),
|
||||
&backuped_file_refs_, backup_env_)));
|
||||
}
|
||||
|
||||
if (options_.destroy_old_data) { // Destory old data
|
||||
for (auto& backup : backups_) {
|
||||
backup.second.Delete();
|
||||
obsolete_backups_.push_back(backup.first);
|
||||
}
|
||||
backups_.clear();
|
||||
// start from beginning
|
||||
latest_backup_id_ = 0;
|
||||
// GarbageCollection() will do the actual deletion
|
||||
} else { // Load data from storage
|
||||
// load the backups if any
|
||||
for (auto& backup : backups_) {
|
||||
Status s = backup.second.LoadFromFile(options_.backup_dir);
|
||||
if (!s.ok()) {
|
||||
Log(options_.info_log, "Backup %u corrupted - deleting -- %s",
|
||||
backup.first, s.ToString().c_str());
|
||||
backup.second.Delete();
|
||||
obsolete_backups_.push_back(backup.first);
|
||||
}
|
||||
}
|
||||
// delete obsolete backups from the structure
|
||||
for (auto ob : obsolete_backups_) {
|
||||
backups_.erase(ob);
|
||||
}
|
||||
|
||||
Status s = GetLatestBackupFileContents(&latest_backup_id_);
|
||||
// If latest backup file is corrupted or non-existent
|
||||
// set latest backup as the biggest backup we have
|
||||
// or 0 if we have no backups
|
||||
if (!s.ok() ||
|
||||
backups_.find(latest_backup_id_) == backups_.end()) {
|
||||
auto itr = backups_.end();
|
||||
latest_backup_id_ = (itr == backups_.begin()) ? 0 : (--itr)->first;
|
||||
}
|
||||
}
|
||||
|
||||
// delete any backups that claim to be later than latest
|
||||
for (auto itr = backups_.upper_bound(latest_backup_id_);
|
||||
itr != backups_.end();) {
|
||||
itr->second.Delete();
|
||||
obsolete_backups_.push_back(itr->first);
|
||||
itr = backups_.erase(itr);
|
||||
}
|
||||
|
||||
PutLatestBackupFileContents(latest_backup_id_); // Ignore errors
|
||||
GarbageCollection(true);
|
||||
Log(options_.info_log,
|
||||
"Initialized BackupEngine, the latest backup is %u.",
|
||||
latest_backup_id_);
|
||||
}
|
||||
|
||||
BackupEngine::~BackupEngine() {
|
||||
LogFlush(options_.info_log);
|
||||
}
|
||||
|
||||
void BackupEngine::DeleteBackupsNewerThan(uint64_t sequence_number) {
|
||||
for (auto backup : backups_) {
|
||||
if (backup.second.GetSequenceNumber() > sequence_number) {
|
||||
Log(options_.info_log,
|
||||
"Deleting backup %u because sequence number (%" PRIu64
|
||||
") is newer than %" PRIu64 "",
|
||||
backup.first, backup.second.GetSequenceNumber(), sequence_number);
|
||||
backup.second.Delete();
|
||||
obsolete_backups_.push_back(backup.first);
|
||||
}
|
||||
}
|
||||
for (auto ob : obsolete_backups_) {
|
||||
backups_.erase(backups_.find(ob));
|
||||
}
|
||||
auto itr = backups_.end();
|
||||
latest_backup_id_ = (itr == backups_.begin()) ? 0 : (--itr)->first;
|
||||
PutLatestBackupFileContents(latest_backup_id_); // Ignore errors
|
||||
GarbageCollection(false);
|
||||
}
|
||||
|
||||
Status BackupEngine::CreateNewBackup(DB* db, bool flush_before_backup) {
|
||||
Status s;
|
||||
std::vector<std::string> live_files;
|
||||
VectorLogPtr live_wal_files;
|
||||
uint64_t manifest_file_size = 0;
|
||||
uint64_t sequence_number = db->GetLatestSequenceNumber();
|
||||
|
||||
s = db->DisableFileDeletions();
|
||||
if (s.ok()) {
|
||||
// this will return live_files prefixed with "/"
|
||||
s = db->GetLiveFiles(live_files, &manifest_file_size, flush_before_backup);
|
||||
}
|
||||
// if we didn't flush before backup, we need to also get WAL files
|
||||
if (s.ok() && !flush_before_backup) {
|
||||
// returns file names prefixed with "/"
|
||||
s = db->GetSortedWalFiles(live_wal_files);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
db->EnableFileDeletions();
|
||||
return s;
|
||||
}
|
||||
|
||||
BackupID new_backup_id = latest_backup_id_ + 1;
|
||||
assert(backups_.find(new_backup_id) == backups_.end());
|
||||
auto ret = backups_.insert(std::make_pair(
|
||||
new_backup_id, BackupMeta(GetBackupMetaFile(new_backup_id),
|
||||
&backuped_file_refs_, backup_env_)));
|
||||
assert(ret.second == true);
|
||||
auto& new_backup = ret.first->second;
|
||||
new_backup.RecordTimestamp();
|
||||
new_backup.SetSequenceNumber(sequence_number);
|
||||
|
||||
Log(options_.info_log, "Started the backup process -- creating backup %u",
|
||||
new_backup_id);
|
||||
|
||||
// create private dir
|
||||
s = backup_env_->CreateDir(GetAbsolutePath(GetPrivateFileRel(new_backup_id)));
|
||||
|
||||
// copy live_files
|
||||
for (size_t i = 0; s.ok() && i < live_files.size(); ++i) {
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
bool ok = ParseFileName(live_files[i], &number, &type);
|
||||
if (!ok) {
|
||||
assert(false);
|
||||
return Status::Corruption("Can't parse file name. This is very bad");
|
||||
}
|
||||
// we should only get sst, manifest and current files here
|
||||
assert(type == kTableFile ||
|
||||
type == kDescriptorFile ||
|
||||
type == kCurrentFile);
|
||||
|
||||
// rules:
|
||||
// * if it's kTableFile, than it's shared
|
||||
// * if it's kDescriptorFile, limit the size to manifest_file_size
|
||||
s = BackupFile(new_backup_id,
|
||||
&new_backup,
|
||||
type == kTableFile, /* shared */
|
||||
db->GetName(), /* src_dir */
|
||||
live_files[i], /* src_fname */
|
||||
(type == kDescriptorFile) ? manifest_file_size : 0);
|
||||
}
|
||||
|
||||
// copy WAL files
|
||||
for (size_t i = 0; s.ok() && i < live_wal_files.size(); ++i) {
|
||||
if (live_wal_files[i]->Type() == kAliveLogFile) {
|
||||
// we only care about live log files
|
||||
// copy the file into backup_dir/files/<new backup>/
|
||||
s = BackupFile(new_backup_id,
|
||||
&new_backup,
|
||||
false, /* not shared */
|
||||
db->GetOptions().wal_dir,
|
||||
live_wal_files[i]->PathName());
|
||||
}
|
||||
}
|
||||
|
||||
// we copied all the files, enable file deletions
|
||||
db->EnableFileDeletions();
|
||||
|
||||
if (s.ok()) {
|
||||
// persist the backup metadata on the disk
|
||||
s = new_backup.StoreToFile(options_.sync);
|
||||
}
|
||||
if (s.ok()) {
|
||||
// install the newly created backup meta! (atomic)
|
||||
s = PutLatestBackupFileContents(new_backup_id);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
// clean all the files we might have created
|
||||
Log(options_.info_log, "Backup failed -- %s", s.ToString().c_str());
|
||||
backups_.erase(new_backup_id);
|
||||
GarbageCollection(true);
|
||||
return s;
|
||||
}
|
||||
|
||||
// here we know that we succeeded and installed the new backup
|
||||
// in the LATEST_BACKUP file
|
||||
latest_backup_id_ = new_backup_id;
|
||||
Log(options_.info_log, "Backup DONE. All is good");
|
||||
return s;
|
||||
}
|
||||
|
||||
Status BackupEngine::PurgeOldBackups(uint32_t num_backups_to_keep) {
|
||||
Log(options_.info_log, "Purging old backups, keeping %u",
|
||||
num_backups_to_keep);
|
||||
while (num_backups_to_keep < backups_.size()) {
|
||||
Log(options_.info_log, "Deleting backup %u", backups_.begin()->first);
|
||||
backups_.begin()->second.Delete();
|
||||
obsolete_backups_.push_back(backups_.begin()->first);
|
||||
backups_.erase(backups_.begin());
|
||||
}
|
||||
GarbageCollection(false);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BackupEngine::DeleteBackup(BackupID backup_id) {
|
||||
Log(options_.info_log, "Deleting backup %u", backup_id);
|
||||
auto backup = backups_.find(backup_id);
|
||||
if (backup == backups_.end()) {
|
||||
return Status::NotFound("Backup not found");
|
||||
}
|
||||
backup->second.Delete();
|
||||
obsolete_backups_.push_back(backup_id);
|
||||
backups_.erase(backup);
|
||||
GarbageCollection(false);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void BackupEngine::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
|
||||
backup_info->reserve(backups_.size());
|
||||
for (auto& backup : backups_) {
|
||||
if (!backup.second.Empty()) {
|
||||
backup_info->push_back(BackupInfo(
|
||||
backup.first, backup.second.GetTimestamp(), backup.second.GetSize()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status BackupEngine::RestoreDBFromBackup(BackupID backup_id,
|
||||
const std::string &db_dir,
|
||||
const std::string &wal_dir) {
|
||||
auto backup_itr = backups_.find(backup_id);
|
||||
if (backup_itr == backups_.end()) {
|
||||
return Status::NotFound("Backup not found");
|
||||
}
|
||||
auto& backup = backup_itr->second;
|
||||
if (backup.Empty()) {
|
||||
return Status::NotFound("Backup not found");
|
||||
}
|
||||
|
||||
Log(options_.info_log, "Restoring backup id %u\n", backup_id);
|
||||
|
||||
// just in case. Ignore errors
|
||||
db_env_->CreateDirIfMissing(db_dir);
|
||||
db_env_->CreateDirIfMissing(wal_dir);
|
||||
|
||||
// delete log files that might have been already in wal_dir.
|
||||
// This is important since they might get replayed to the restored DB,
|
||||
// which will then differ from the backuped DB
|
||||
std::vector<std::string> delete_children;
|
||||
db_env_->GetChildren(wal_dir, &delete_children); // ignore errors
|
||||
for (auto f : delete_children) {
|
||||
db_env_->DeleteFile(wal_dir + "/" + f); // ignore errors
|
||||
}
|
||||
// Also delete all the db_dir children. This is not so important
|
||||
// because obsolete files will be deleted by DBImpl::PurgeObsoleteFiles()
|
||||
delete_children.clear();
|
||||
db_env_->GetChildren(db_dir, &delete_children); // ignore errors
|
||||
for (auto f : delete_children) {
|
||||
db_env_->DeleteFile(db_dir + "/" + f); // ignore errors
|
||||
}
|
||||
|
||||
Status s;
|
||||
for (auto& file : backup.GetFiles()) {
|
||||
std::string dst;
|
||||
// 1. extract the filename
|
||||
size_t slash = file.find_last_of('/');
|
||||
// file will either be shared/<file> or private/<number>/<file>
|
||||
assert(slash != std::string::npos);
|
||||
dst = file.substr(slash + 1);
|
||||
|
||||
// 2. find the filetype
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
bool ok = ParseFileName(dst, &number, &type);
|
||||
if (!ok) {
|
||||
return Status::Corruption("Backup corrupted");
|
||||
}
|
||||
// 3. Construct the final path
|
||||
// kLogFile lives in wal_dir and all the rest live in db_dir
|
||||
dst = ((type == kLogFile) ? wal_dir : db_dir) +
|
||||
"/" + dst;
|
||||
|
||||
Log(options_.info_log, "Restoring %s to %s\n", file.c_str(), dst.c_str());
|
||||
s = CopyFile(GetAbsolutePath(file), dst, backup_env_, db_env_, false);
|
||||
if (!s.ok()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Log(options_.info_log, "Restoring done -- %s\n", s.ToString().c_str());
|
||||
return s;
|
||||
}
|
||||
|
||||
// latest backup id is an ASCII representation of latest backup id
|
||||
Status BackupEngine::GetLatestBackupFileContents(uint32_t* latest_backup) {
|
||||
Status s;
|
||||
unique_ptr<SequentialFile> file;
|
||||
s = backup_env_->NewSequentialFile(GetLatestBackupFile(),
|
||||
&file,
|
||||
EnvOptions());
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
char buf[11];
|
||||
Slice data;
|
||||
s = file->Read(10, &data, buf);
|
||||
if (!s.ok() || data.size() == 0) {
|
||||
return s.ok() ? Status::Corruption("Latest backup file corrupted") : s;
|
||||
}
|
||||
buf[data.size()] = 0;
|
||||
|
||||
*latest_backup = 0;
|
||||
sscanf(data.data(), "%u", latest_backup);
|
||||
if (backup_env_->FileExists(GetBackupMetaFile(*latest_backup)) == false) {
|
||||
s = Status::Corruption("Latest backup file corrupted");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// this operation HAS to be atomic
|
||||
// writing 4 bytes to the file is atomic alright, but we should *never*
|
||||
// do something like 1. delete file, 2. write new file
|
||||
// We write to a tmp file and then atomically rename
|
||||
Status BackupEngine::PutLatestBackupFileContents(uint32_t latest_backup) {
|
||||
Status s;
|
||||
unique_ptr<WritableFile> file;
|
||||
EnvOptions env_options;
|
||||
env_options.use_mmap_writes = false;
|
||||
s = backup_env_->NewWritableFile(GetLatestBackupFile(true),
|
||||
&file,
|
||||
env_options);
|
||||
if (!s.ok()) {
|
||||
backup_env_->DeleteFile(GetLatestBackupFile(true));
|
||||
return s;
|
||||
}
|
||||
|
||||
char file_contents[10];
|
||||
int len = sprintf(file_contents, "%u\n", latest_backup);
|
||||
s = file->Append(Slice(file_contents, len));
|
||||
if (s.ok() && options_.sync) {
|
||||
file->Sync();
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = file->Close();
|
||||
}
|
||||
if (s.ok()) {
|
||||
// atomically replace real file with new tmp
|
||||
s = backup_env_->RenameFile(GetLatestBackupFile(true),
|
||||
GetLatestBackupFile(false));
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status BackupEngine::CopyFile(const std::string& src,
|
||||
const std::string& dst,
|
||||
Env* src_env,
|
||||
Env* dst_env,
|
||||
bool sync,
|
||||
uint64_t* size,
|
||||
uint64_t size_limit) {
|
||||
Status s;
|
||||
unique_ptr<WritableFile> dst_file;
|
||||
unique_ptr<SequentialFile> src_file;
|
||||
EnvOptions env_options;
|
||||
env_options.use_mmap_writes = false;
|
||||
if (size != nullptr) {
|
||||
*size = 0;
|
||||
}
|
||||
|
||||
// Check if size limit is set. if not, set it to very big number
|
||||
if (size_limit == 0) {
|
||||
size_limit = std::numeric_limits<uint64_t>::max();
|
||||
}
|
||||
|
||||
s = src_env->NewSequentialFile(src, &src_file, env_options);
|
||||
if (s.ok()) {
|
||||
s = dst_env->NewWritableFile(dst, &dst_file, env_options);
|
||||
}
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
unique_ptr<char[]> buf(new char[copy_file_buffer_size_]);
|
||||
Slice data;
|
||||
|
||||
do {
|
||||
size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) ?
|
||||
copy_file_buffer_size_ : size_limit;
|
||||
s = src_file->Read(buffer_to_read, &data, buf.get());
|
||||
size_limit -= data.size();
|
||||
if (size != nullptr) {
|
||||
*size += data.size();
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = dst_file->Append(data);
|
||||
}
|
||||
} while (s.ok() && data.size() > 0 && size_limit > 0);
|
||||
|
||||
if (s.ok() && sync) {
|
||||
s = dst_file->Sync();
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
// src_fname will always start with "/"
|
||||
Status BackupEngine::BackupFile(BackupID backup_id,
|
||||
BackupMeta* backup,
|
||||
bool shared,
|
||||
const std::string& src_dir,
|
||||
const std::string& src_fname,
|
||||
uint64_t size_limit) {
|
||||
|
||||
assert(src_fname.size() > 0 && src_fname[0] == '/');
|
||||
std::string dst_relative = src_fname.substr(1);
|
||||
if (shared) {
|
||||
dst_relative = GetSharedFileRel(dst_relative);
|
||||
} else {
|
||||
dst_relative = GetPrivateFileRel(backup_id, dst_relative);
|
||||
}
|
||||
std::string dst_path = GetAbsolutePath(dst_relative);
|
||||
Status s;
|
||||
uint64_t size;
|
||||
|
||||
// if it's shared, we also need to check if it exists -- if it does,
|
||||
// no need to copy it again
|
||||
if (shared && backup_env_->FileExists(dst_path)) {
|
||||
backup_env_->GetFileSize(dst_path, &size); // Ignore error
|
||||
Log(options_.info_log, "%s already present", src_fname.c_str());
|
||||
} else {
|
||||
Log(options_.info_log, "Copying %s", src_fname.c_str());
|
||||
s = CopyFile(src_dir + src_fname,
|
||||
dst_path,
|
||||
db_env_,
|
||||
backup_env_,
|
||||
options_.sync,
|
||||
&size,
|
||||
size_limit);
|
||||
}
|
||||
if (s.ok()) {
|
||||
backup->AddFile(dst_relative, size);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
void BackupEngine::GarbageCollection(bool full_scan) {
|
||||
Log(options_.info_log, "Starting garbage collection");
|
||||
std::vector<std::string> to_delete;
|
||||
for (auto& itr : backuped_file_refs_) {
|
||||
if (itr.second == 0) {
|
||||
Status s = backup_env_->DeleteFile(GetAbsolutePath(itr.first));
|
||||
Log(options_.info_log, "Deleting %s -- %s", itr.first.c_str(),
|
||||
s.ToString().c_str());
|
||||
to_delete.push_back(itr.first);
|
||||
}
|
||||
}
|
||||
for (auto& td : to_delete) {
|
||||
backuped_file_refs_.erase(td);
|
||||
}
|
||||
if (!full_scan) {
|
||||
// take care of private dirs -- if full_scan == true, then full_scan will
|
||||
// take care of them
|
||||
for (auto backup_id : obsolete_backups_) {
|
||||
std::string private_dir = GetPrivateFileRel(backup_id);
|
||||
Status s = backup_env_->DeleteDir(GetAbsolutePath(private_dir));
|
||||
Log(options_.info_log, "Deleting private dir %s -- %s",
|
||||
private_dir.c_str(), s.ToString().c_str());
|
||||
}
|
||||
}
|
||||
obsolete_backups_.clear();
|
||||
|
||||
if (full_scan) {
|
||||
Log(options_.info_log, "Starting full scan garbage collection");
|
||||
// delete obsolete shared files
|
||||
std::vector<std::string> shared_children;
|
||||
backup_env_->GetChildren(GetAbsolutePath(GetSharedFileRel()),
|
||||
&shared_children);
|
||||
for (auto& child : shared_children) {
|
||||
std::string rel_fname = GetSharedFileRel(child);
|
||||
// if it's not refcounted, delete it
|
||||
if (backuped_file_refs_.find(rel_fname) == backuped_file_refs_.end()) {
|
||||
// this might be a directory, but DeleteFile will just fail in that
|
||||
// case, so we're good
|
||||
Status s = backup_env_->DeleteFile(GetAbsolutePath(rel_fname));
|
||||
if (s.ok()) {
|
||||
Log(options_.info_log, "Deleted %s", rel_fname.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// delete obsolete private files
|
||||
std::vector<std::string> private_children;
|
||||
backup_env_->GetChildren(GetAbsolutePath(GetPrivateDirRel()),
|
||||
&private_children);
|
||||
for (auto& child : private_children) {
|
||||
BackupID backup_id = 0;
|
||||
sscanf(child.c_str(), "%u", &backup_id);
|
||||
if (backup_id == 0 || backups_.find(backup_id) != backups_.end()) {
|
||||
// it's either not a number or it's still alive. continue
|
||||
continue;
|
||||
}
|
||||
// here we have to delete the dir and all its children
|
||||
std::string full_private_path =
|
||||
GetAbsolutePath(GetPrivateFileRel(backup_id));
|
||||
std::vector<std::string> subchildren;
|
||||
backup_env_->GetChildren(full_private_path, &subchildren);
|
||||
for (auto& subchild : subchildren) {
|
||||
Status s = backup_env_->DeleteFile(full_private_path + subchild);
|
||||
if (s.ok()) {
|
||||
Log(options_.info_log, "Deleted %s",
|
||||
(full_private_path + subchild).c_str());
|
||||
}
|
||||
}
|
||||
// finally delete the private dir
|
||||
Status s = backup_env_->DeleteDir(full_private_path);
|
||||
Log(options_.info_log, "Deleted dir %s -- %s", full_private_path.c_str(),
|
||||
s.ToString().c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ------- BackupMeta class --------
|
||||
|
||||
void BackupEngine::BackupMeta::AddFile(const std::string& filename,
|
||||
uint64_t size) {
|
||||
size_ += size;
|
||||
files_.push_back(filename);
|
||||
auto itr = file_refs_->find(filename);
|
||||
if (itr == file_refs_->end()) {
|
||||
file_refs_->insert(std::make_pair(filename, 1));
|
||||
} else {
|
||||
++itr->second; // increase refcount if already present
|
||||
}
|
||||
}
|
||||
|
||||
void BackupEngine::BackupMeta::Delete() {
|
||||
for (auto& file : files_) {
|
||||
auto itr = file_refs_->find(file);
|
||||
assert(itr != file_refs_->end());
|
||||
--(itr->second); // decrease refcount
|
||||
}
|
||||
files_.clear();
|
||||
// delete meta file
|
||||
env_->DeleteFile(meta_filename_);
|
||||
timestamp_ = 0;
|
||||
}
|
||||
|
||||
// each backup meta file is of the format:
|
||||
// <timestamp>
|
||||
// <seq number>
|
||||
// <number of files>
|
||||
// <file1>
|
||||
// <file2>
|
||||
// ...
|
||||
// TODO: maybe add checksum?
|
||||
Status BackupEngine::BackupMeta::LoadFromFile(const std::string& backup_dir) {
|
||||
assert(Empty());
|
||||
Status s;
|
||||
unique_ptr<SequentialFile> backup_meta_file;
|
||||
s = env_->NewSequentialFile(meta_filename_, &backup_meta_file, EnvOptions());
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
unique_ptr<char[]> buf(new char[max_backup_meta_file_size_ + 1]);
|
||||
Slice data;
|
||||
s = backup_meta_file->Read(max_backup_meta_file_size_, &data, buf.get());
|
||||
|
||||
if (!s.ok() || data.size() == max_backup_meta_file_size_) {
|
||||
return s.ok() ? Status::IOError("File size too big") : s;
|
||||
}
|
||||
buf[data.size()] = 0;
|
||||
|
||||
uint32_t num_files = 0;
|
||||
int bytes_read = 0;
|
||||
sscanf(data.data(), "%" PRId64 "%n", ×tamp_, &bytes_read);
|
||||
data.remove_prefix(bytes_read + 1); // +1 for '\n'
|
||||
sscanf(data.data(), "%" PRIu64 "%n", &sequence_number_, &bytes_read);
|
||||
data.remove_prefix(bytes_read + 1); // +1 for '\n'
|
||||
sscanf(data.data(), "%u%n", &num_files, &bytes_read);
|
||||
data.remove_prefix(bytes_read + 1); // +1 for '\n'
|
||||
|
||||
std::vector<std::pair<std::string, uint64_t>> files;
|
||||
|
||||
for (uint32_t i = 0; s.ok() && i < num_files; ++i) {
|
||||
std::string filename = GetSliceUntil(&data, '\n').ToString();
|
||||
uint64_t size;
|
||||
s = env_->GetFileSize(backup_dir + "/" + filename, &size);
|
||||
files.push_back(std::make_pair(filename, size));
|
||||
}
|
||||
|
||||
if (s.ok()) {
|
||||
for (auto file : files) {
|
||||
AddFile(file.first, file.second);
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Status BackupEngine::BackupMeta::StoreToFile(bool sync) {
|
||||
Status s;
|
||||
unique_ptr<WritableFile> backup_meta_file;
|
||||
EnvOptions env_options;
|
||||
env_options.use_mmap_writes = false;
|
||||
s = env_->NewWritableFile(meta_filename_ + ".tmp", &backup_meta_file,
|
||||
env_options);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
unique_ptr<char[]> buf(new char[max_backup_meta_file_size_]);
|
||||
int len = 0, buf_size = max_backup_meta_file_size_;
|
||||
len += snprintf(buf.get(), buf_size, "%" PRId64 "\n", timestamp_);
|
||||
len += snprintf(buf.get() + len, buf_size - len, "%" PRIu64 "\n",
|
||||
sequence_number_);
|
||||
len += snprintf(buf.get() + len, buf_size - len, "%zu\n", files_.size());
|
||||
for (size_t i = 0; i < files_.size(); ++i) {
|
||||
len += snprintf(buf.get() + len, buf_size - len, "%s\n", files_[i].c_str());
|
||||
}
|
||||
|
||||
s = backup_meta_file->Append(Slice(buf.get(), (size_t)len));
|
||||
if (s.ok() && sync) {
|
||||
s = backup_meta_file->Sync();
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = backup_meta_file->Close();
|
||||
}
|
||||
if (s.ok()) {
|
||||
s = env_->RenameFile(meta_filename_ + ".tmp", meta_filename_);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// --- BackupableDB methods --------
|
||||
|
||||
BackupableDB::BackupableDB(DB* db, const BackupableDBOptions& options)
|
||||
: StackableDB(db), backup_engine_(new BackupEngine(db->GetEnv(), options)) {
|
||||
backup_engine_->DeleteBackupsNewerThan(GetLatestSequenceNumber());
|
||||
}
|
||||
|
||||
BackupableDB::~BackupableDB() {
|
||||
delete backup_engine_;
|
||||
}
|
||||
|
||||
Status BackupableDB::CreateNewBackup(bool flush_before_backup) {
|
||||
return backup_engine_->CreateNewBackup(this, flush_before_backup);
|
||||
}
|
||||
|
||||
void BackupableDB::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
|
||||
backup_engine_->GetBackupInfo(backup_info);
|
||||
}
|
||||
|
||||
Status BackupableDB::PurgeOldBackups(uint32_t num_backups_to_keep) {
|
||||
return backup_engine_->PurgeOldBackups(num_backups_to_keep);
|
||||
}
|
||||
|
||||
Status BackupableDB::DeleteBackup(BackupID backup_id) {
|
||||
return backup_engine_->DeleteBackup(backup_id);
|
||||
}
|
||||
|
||||
// --- RestoreBackupableDB methods ------
|
||||
|
||||
RestoreBackupableDB::RestoreBackupableDB(Env* db_env,
|
||||
const BackupableDBOptions& options)
|
||||
: backup_engine_(new BackupEngine(db_env, options)) {}
|
||||
|
||||
RestoreBackupableDB::~RestoreBackupableDB() {
|
||||
delete backup_engine_;
|
||||
}
|
||||
|
||||
void
|
||||
RestoreBackupableDB::GetBackupInfo(std::vector<BackupInfo>* backup_info) {
|
||||
backup_engine_->GetBackupInfo(backup_info);
|
||||
}
|
||||
|
||||
Status RestoreBackupableDB::RestoreDBFromBackup(BackupID backup_id,
|
||||
const std::string& db_dir,
|
||||
const std::string& wal_dir) {
|
||||
return backup_engine_->RestoreDBFromBackup(backup_id, db_dir, wal_dir);
|
||||
}
|
||||
|
||||
Status
|
||||
RestoreBackupableDB::RestoreDBFromLatestBackup(const std::string& db_dir,
|
||||
const std::string& wal_dir) {
|
||||
return backup_engine_->RestoreDBFromLatestBackup(db_dir, wal_dir);
|
||||
}
|
||||
|
||||
Status RestoreBackupableDB::PurgeOldBackups(uint32_t num_backups_to_keep) {
|
||||
return backup_engine_->PurgeOldBackups(num_backups_to_keep);
|
||||
}
|
||||
|
||||
Status RestoreBackupableDB::DeleteBackup(BackupID backup_id) {
|
||||
return backup_engine_->DeleteBackup(backup_id);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
668
utilities/backupable/backupable_db_test.cc
Normal file
668
utilities/backupable/backupable_db_test.cc
Normal file
@ -0,0 +1,668 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "rocksdb/types.h"
|
||||
#include "rocksdb/transaction_log.h"
|
||||
#include "utilities/utility_db.h"
|
||||
#include "utilities/backupable_db.h"
|
||||
#include "util/testharness.h"
|
||||
#include "util/random.h"
|
||||
#include "util/testutil.h"
|
||||
#include "util/auto_roll_logger.h"
|
||||
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
namespace {
|
||||
|
||||
using std::unique_ptr;
|
||||
|
||||
class DummyDB : public StackableDB {
|
||||
public:
|
||||
/* implicit */
|
||||
DummyDB(const Options& options, const std::string& dbname)
|
||||
: StackableDB(nullptr), options_(options), dbname_(dbname),
|
||||
deletions_enabled_(true), sequence_number_(0) {}
|
||||
|
||||
virtual SequenceNumber GetLatestSequenceNumber() const {
|
||||
return ++sequence_number_;
|
||||
}
|
||||
|
||||
virtual const std::string& GetName() const override {
|
||||
return dbname_;
|
||||
}
|
||||
|
||||
virtual Env* GetEnv() const override {
|
||||
return options_.env;
|
||||
}
|
||||
|
||||
virtual const Options& GetOptions() const override {
|
||||
return options_;
|
||||
}
|
||||
|
||||
virtual Status EnableFileDeletions() override {
|
||||
ASSERT_TRUE(!deletions_enabled_);
|
||||
deletions_enabled_ = true;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status DisableFileDeletions() override {
|
||||
ASSERT_TRUE(deletions_enabled_);
|
||||
deletions_enabled_ = false;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status GetLiveFiles(std::vector<std::string>& vec, uint64_t* mfs,
|
||||
bool flush_memtable = true) override {
|
||||
ASSERT_TRUE(!deletions_enabled_);
|
||||
vec = live_files_;
|
||||
*mfs = 100;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
class DummyLogFile : public LogFile {
|
||||
public:
|
||||
/* implicit */
|
||||
DummyLogFile(const std::string& path, bool alive = true)
|
||||
: path_(path), alive_(alive) {}
|
||||
|
||||
virtual std::string PathName() const override {
|
||||
return path_;
|
||||
}
|
||||
|
||||
virtual uint64_t LogNumber() const {
|
||||
// what business do you have calling this method?
|
||||
ASSERT_TRUE(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual WalFileType Type() const override {
|
||||
return alive_ ? kAliveLogFile : kArchivedLogFile;
|
||||
}
|
||||
|
||||
virtual SequenceNumber StartSequence() const {
|
||||
// backupabledb should not need this method
|
||||
ASSERT_TRUE(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual uint64_t SizeFileBytes() const {
|
||||
// backupabledb should not need this method
|
||||
ASSERT_TRUE(false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string path_;
|
||||
bool alive_;
|
||||
}; // DummyLogFile
|
||||
|
||||
virtual Status GetSortedWalFiles(VectorLogPtr& files) override {
|
||||
ASSERT_TRUE(!deletions_enabled_);
|
||||
files.resize(wal_files_.size());
|
||||
for (size_t i = 0; i < files.size(); ++i) {
|
||||
files[i].reset(
|
||||
new DummyLogFile(wal_files_[i].first, wal_files_[i].second));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::vector<std::string> live_files_;
|
||||
// pair<filename, alive?>
|
||||
std::vector<std::pair<std::string, bool>> wal_files_;
|
||||
private:
|
||||
Options options_;
|
||||
std::string dbname_;
|
||||
bool deletions_enabled_;
|
||||
mutable SequenceNumber sequence_number_;
|
||||
}; // DummyDB
|
||||
|
||||
class TestEnv : public EnvWrapper {
|
||||
public:
|
||||
explicit TestEnv(Env* t) : EnvWrapper(t) {}
|
||||
|
||||
class DummySequentialFile : public SequentialFile {
|
||||
public:
|
||||
DummySequentialFile() : SequentialFile(), rnd_(5) {}
|
||||
virtual Status Read(size_t n, Slice* result, char* scratch) {
|
||||
size_t read_size = (n > size_left) ? size_left : n;
|
||||
for (size_t i = 0; i < read_size; ++i) {
|
||||
scratch[i] = rnd_.Next() & 255;
|
||||
}
|
||||
*result = Slice(scratch, read_size);
|
||||
size_left -= read_size;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
virtual Status Skip(uint64_t n) {
|
||||
size_left = (n > size_left) ? size_left - n : 0;
|
||||
return Status::OK();
|
||||
}
|
||||
private:
|
||||
size_t size_left = 200;
|
||||
Random rnd_;
|
||||
};
|
||||
|
||||
Status NewSequentialFile(const std::string& f,
|
||||
unique_ptr<SequentialFile>* r,
|
||||
const EnvOptions& options) {
|
||||
opened_files_.push_back(f);
|
||||
if (dummy_sequential_file_) {
|
||||
r->reset(new TestEnv::DummySequentialFile());
|
||||
return Status::OK();
|
||||
} else {
|
||||
return EnvWrapper::NewSequentialFile(f, r, options);
|
||||
}
|
||||
}
|
||||
|
||||
Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
|
||||
const EnvOptions& options) {
|
||||
if (limit_written_files_ <= 0) {
|
||||
return Status::IOError("Sorry, can't do this");
|
||||
}
|
||||
limit_written_files_--;
|
||||
return EnvWrapper::NewWritableFile(f, r, options);
|
||||
}
|
||||
|
||||
void AssertOpenedFiles(std::vector<std::string>& should_have_opened) {
|
||||
sort(should_have_opened.begin(), should_have_opened.end());
|
||||
sort(opened_files_.begin(), opened_files_.end());
|
||||
ASSERT_TRUE(opened_files_ == should_have_opened);
|
||||
}
|
||||
|
||||
void ClearOpenedFiles() {
|
||||
opened_files_.clear();
|
||||
}
|
||||
|
||||
void SetLimitWrittenFiles(uint64_t limit) {
|
||||
limit_written_files_ = limit;
|
||||
}
|
||||
|
||||
void SetDummySequentialFile(bool dummy_sequential_file) {
|
||||
dummy_sequential_file_ = dummy_sequential_file;
|
||||
}
|
||||
|
||||
private:
|
||||
bool dummy_sequential_file_ = false;
|
||||
std::vector<std::string> opened_files_;
|
||||
uint64_t limit_written_files_ = 1000000;
|
||||
}; // TestEnv
|
||||
|
||||
class FileManager : public EnvWrapper {
|
||||
public:
|
||||
explicit FileManager(Env* t) : EnvWrapper(t), rnd_(5) {}
|
||||
|
||||
Status DeleteRandomFileInDir(const std::string dir) {
|
||||
std::vector<std::string> children;
|
||||
GetChildren(dir, &children);
|
||||
if (children.size() <= 2) { // . and ..
|
||||
return Status::NotFound("");
|
||||
}
|
||||
while (true) {
|
||||
int i = rnd_.Next() % children.size();
|
||||
if (children[i] != "." && children[i] != "..") {
|
||||
return DeleteFile(dir + "/" + children[i]);
|
||||
}
|
||||
}
|
||||
// should never get here
|
||||
assert(false);
|
||||
return Status::NotFound("");
|
||||
}
|
||||
|
||||
Status CorruptFile(const std::string& fname, uint64_t bytes_to_corrupt) {
|
||||
uint64_t size;
|
||||
Status s = GetFileSize(fname, &size);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
unique_ptr<RandomRWFile> file;
|
||||
EnvOptions env_options;
|
||||
env_options.use_mmap_writes = false;
|
||||
s = NewRandomRWFile(fname, &file, env_options);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; s.ok() && i < bytes_to_corrupt; ++i) {
|
||||
std::string tmp;
|
||||
// write one random byte to a random position
|
||||
s = file->Write(rnd_.Next() % size, test::RandomString(&rnd_, 1, &tmp));
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
Status WriteToFile(const std::string& fname, const std::string& data) {
|
||||
unique_ptr<WritableFile> file;
|
||||
EnvOptions env_options;
|
||||
env_options.use_mmap_writes = false;
|
||||
Status s = EnvWrapper::NewWritableFile(fname, &file, env_options);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
return file->Append(Slice(data));
|
||||
}
|
||||
private:
|
||||
Random rnd_;
|
||||
}; // FileManager
|
||||
|
||||
// utility functions
|
||||
static void FillDB(DB* db, int from, int to) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
std::string key = "testkey" + std::to_string(i);
|
||||
std::string value = "testvalue" + std::to_string(i);
|
||||
|
||||
ASSERT_OK(db->Put(WriteOptions(), Slice(key), Slice(value)));
|
||||
}
|
||||
}
|
||||
|
||||
static void AssertExists(DB* db, int from, int to) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
std::string key = "testkey" + std::to_string(i);
|
||||
std::string value;
|
||||
Status s = db->Get(ReadOptions(), Slice(key), &value);
|
||||
ASSERT_EQ(value, "testvalue" + std::to_string(i));
|
||||
}
|
||||
}
|
||||
|
||||
static void AssertEmpty(DB* db, int from, int to) {
|
||||
for (int i = from; i < to; ++i) {
|
||||
std::string key = "testkey" + std::to_string(i);
|
||||
std::string value = "testvalue" + std::to_string(i);
|
||||
|
||||
Status s = db->Get(ReadOptions(), Slice(key), &value);
|
||||
ASSERT_TRUE(s.IsNotFound());
|
||||
}
|
||||
}
|
||||
|
||||
class BackupableDBTest {
|
||||
public:
|
||||
BackupableDBTest() {
|
||||
// set up files
|
||||
dbname_ = test::TmpDir() + "/backupable_db";
|
||||
backupdir_ = test::TmpDir() + "/backupable_db_backup";
|
||||
|
||||
// set up envs
|
||||
env_ = Env::Default();
|
||||
test_db_env_.reset(new TestEnv(env_));
|
||||
test_backup_env_.reset(new TestEnv(env_));
|
||||
file_manager_.reset(new FileManager(env_));
|
||||
|
||||
// set up db options
|
||||
options_.create_if_missing = true;
|
||||
options_.paranoid_checks = true;
|
||||
options_.write_buffer_size = 1 << 17; // 128KB
|
||||
options_.env = test_db_env_.get();
|
||||
options_.wal_dir = dbname_;
|
||||
// set up backup db options
|
||||
CreateLoggerFromOptions(dbname_, backupdir_, env_,
|
||||
Options(), &logger_);
|
||||
backupable_options_.reset(new BackupableDBOptions(
|
||||
backupdir_, test_backup_env_.get(), logger_.get(), true));
|
||||
|
||||
// delete old files in db
|
||||
DestroyDB(dbname_, Options());
|
||||
}
|
||||
|
||||
DB* OpenDB() {
|
||||
DB* db;
|
||||
ASSERT_OK(DB::Open(options_, dbname_, &db));
|
||||
return db;
|
||||
}
|
||||
|
||||
void OpenBackupableDB(bool destroy_old_data = false, bool dummy = false) {
|
||||
// reset all the defaults
|
||||
test_backup_env_->SetLimitWrittenFiles(1000000);
|
||||
test_db_env_->SetLimitWrittenFiles(1000000);
|
||||
test_db_env_->SetDummySequentialFile(dummy);
|
||||
|
||||
DB* db;
|
||||
if (dummy) {
|
||||
dummy_db_ = new DummyDB(options_, dbname_);
|
||||
db = dummy_db_;
|
||||
} else {
|
||||
ASSERT_OK(DB::Open(options_, dbname_, &db));
|
||||
}
|
||||
backupable_options_->destroy_old_data = destroy_old_data;
|
||||
db_.reset(new BackupableDB(db, *backupable_options_));
|
||||
}
|
||||
|
||||
void CloseBackupableDB() {
|
||||
db_.reset(nullptr);
|
||||
}
|
||||
|
||||
void OpenRestoreDB() {
|
||||
backupable_options_->destroy_old_data = false;
|
||||
restore_db_.reset(
|
||||
new RestoreBackupableDB(test_db_env_.get(), *backupable_options_));
|
||||
}
|
||||
|
||||
void CloseRestoreDB() {
|
||||
restore_db_.reset(nullptr);
|
||||
}
|
||||
|
||||
// restores backup backup_id and asserts the existence of
|
||||
// [start_exist, end_exist> and not-existence of
|
||||
// [end_exist, end>
|
||||
//
|
||||
// if backup_id == 0, it means restore from latest
|
||||
// if end == 0, don't check AssertEmpty
|
||||
void AssertBackupConsistency(BackupID backup_id, uint32_t start_exist,
|
||||
uint32_t end_exist, uint32_t end = 0) {
|
||||
bool opened_restore = false;
|
||||
if (restore_db_.get() == nullptr) {
|
||||
opened_restore = true;
|
||||
OpenRestoreDB();
|
||||
}
|
||||
if (backup_id > 0) {
|
||||
ASSERT_OK(restore_db_->RestoreDBFromBackup(backup_id, dbname_, dbname_));
|
||||
} else {
|
||||
ASSERT_OK(restore_db_->RestoreDBFromLatestBackup(dbname_, dbname_));
|
||||
}
|
||||
DB* db = OpenDB();
|
||||
AssertExists(db, start_exist, end_exist);
|
||||
if (end != 0) {
|
||||
AssertEmpty(db, end_exist, end);
|
||||
}
|
||||
delete db;
|
||||
if (opened_restore) {
|
||||
CloseRestoreDB();
|
||||
}
|
||||
}
|
||||
|
||||
// files
|
||||
std::string dbname_;
|
||||
std::string backupdir_;
|
||||
|
||||
// envs
|
||||
Env* env_;
|
||||
unique_ptr<TestEnv> test_db_env_;
|
||||
unique_ptr<TestEnv> test_backup_env_;
|
||||
unique_ptr<FileManager> file_manager_;
|
||||
|
||||
// all the dbs!
|
||||
DummyDB* dummy_db_; // BackupableDB owns dummy_db_
|
||||
unique_ptr<BackupableDB> db_;
|
||||
unique_ptr<RestoreBackupableDB> restore_db_;
|
||||
|
||||
// options
|
||||
Options options_;
|
||||
unique_ptr<BackupableDBOptions> backupable_options_;
|
||||
std::shared_ptr<Logger> logger_;
|
||||
}; // BackupableDBTest
|
||||
|
||||
void AppendPath(const std::string& path, std::vector<std::string>& v) {
|
||||
for (auto& f : v) {
|
||||
f = path + f;
|
||||
}
|
||||
}
|
||||
|
||||
// this will make sure that backup does not copy the same file twice
|
||||
TEST(BackupableDBTest, NoDoubleCopy) {
|
||||
OpenBackupableDB(true, true);
|
||||
|
||||
// should write 5 DB files + LATEST_BACKUP + one meta file
|
||||
test_backup_env_->SetLimitWrittenFiles(7);
|
||||
test_db_env_->ClearOpenedFiles();
|
||||
test_db_env_->SetLimitWrittenFiles(0);
|
||||
dummy_db_->live_files_ = { "/00010.sst", "/00011.sst",
|
||||
"/CURRENT", "/MANIFEST-01" };
|
||||
dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}};
|
||||
ASSERT_OK(db_->CreateNewBackup(false));
|
||||
std::vector<std::string> should_have_openened = dummy_db_->live_files_;
|
||||
should_have_openened.push_back("/00011.log");
|
||||
AppendPath(dbname_, should_have_openened);
|
||||
test_db_env_->AssertOpenedFiles(should_have_openened);
|
||||
|
||||
// should write 4 new DB files + LATEST_BACKUP + one meta file
|
||||
// should not write/copy 00010.sst, since it's already there!
|
||||
test_backup_env_->SetLimitWrittenFiles(6);
|
||||
test_db_env_->ClearOpenedFiles();
|
||||
dummy_db_->live_files_ = { "/00010.sst", "/00015.sst",
|
||||
"/CURRENT", "/MANIFEST-01" };
|
||||
dummy_db_->wal_files_ = {{"/00011.log", true}, {"/00012.log", false}};
|
||||
ASSERT_OK(db_->CreateNewBackup(false));
|
||||
// should not open 00010.sst - it's already there
|
||||
should_have_openened = { "/00015.sst", "/CURRENT",
|
||||
"/MANIFEST-01", "/00011.log" };
|
||||
AppendPath(dbname_, should_have_openened);
|
||||
test_db_env_->AssertOpenedFiles(should_have_openened);
|
||||
|
||||
ASSERT_OK(db_->DeleteBackup(1));
|
||||
ASSERT_EQ(true,
|
||||
test_backup_env_->FileExists(backupdir_ + "/shared/00010.sst"));
|
||||
// 00011.sst was only in backup 1, should be deleted
|
||||
ASSERT_EQ(false,
|
||||
test_backup_env_->FileExists(backupdir_ + "/shared/00011.sst"));
|
||||
ASSERT_EQ(true,
|
||||
test_backup_env_->FileExists(backupdir_ + "/shared/00015.sst"));
|
||||
|
||||
// MANIFEST file size should be only 100
|
||||
uint64_t size;
|
||||
test_backup_env_->GetFileSize(backupdir_ + "/private/2/MANIFEST-01", &size);
|
||||
ASSERT_EQ(100UL, size);
|
||||
test_backup_env_->GetFileSize(backupdir_ + "/shared/00015.sst", &size);
|
||||
ASSERT_EQ(200UL, size);
|
||||
|
||||
CloseBackupableDB();
|
||||
}
|
||||
|
||||
// test various kind of corruptions that may happen:
|
||||
// 1. Not able to write a file for backup - that backup should fail,
|
||||
// everything else should work
|
||||
// 2. Corrupted/deleted LATEST_BACKUP - everything should work fine
|
||||
// 3. Corrupted backup meta file or missing backuped file - we should
|
||||
// not be able to open that backup, but all other backups should be
|
||||
// fine
|
||||
TEST(BackupableDBTest, CorruptionsTest) {
|
||||
const int keys_iteration = 5000;
|
||||
Random rnd(6);
|
||||
Status s;
|
||||
|
||||
OpenBackupableDB(true);
|
||||
// create five backups
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
FillDB(db_.get(), keys_iteration * i, keys_iteration * (i + 1));
|
||||
ASSERT_OK(db_->CreateNewBackup(!!(rnd.Next() % 2)));
|
||||
}
|
||||
|
||||
// ---------- case 1. - fail a write -----------
|
||||
// try creating backup 6, but fail a write
|
||||
FillDB(db_.get(), keys_iteration * 5, keys_iteration * 6);
|
||||
test_backup_env_->SetLimitWrittenFiles(2);
|
||||
// should fail
|
||||
s = db_->CreateNewBackup(!!(rnd.Next() % 2));
|
||||
ASSERT_TRUE(!s.ok());
|
||||
test_backup_env_->SetLimitWrittenFiles(1000000);
|
||||
// latest backup should have all the keys
|
||||
CloseBackupableDB();
|
||||
AssertBackupConsistency(0, 0, keys_iteration * 5, keys_iteration * 6);
|
||||
|
||||
// ---------- case 2. - corrupt/delete latest backup -----------
|
||||
ASSERT_OK(file_manager_->CorruptFile(backupdir_ + "/LATEST_BACKUP", 2));
|
||||
AssertBackupConsistency(0, 0, keys_iteration * 5);
|
||||
ASSERT_OK(file_manager_->DeleteFile(backupdir_ + "/LATEST_BACKUP"));
|
||||
AssertBackupConsistency(0, 0, keys_iteration * 5);
|
||||
// create backup 6, point LATEST_BACKUP to 5
|
||||
OpenBackupableDB();
|
||||
FillDB(db_.get(), keys_iteration * 5, keys_iteration * 6);
|
||||
ASSERT_OK(db_->CreateNewBackup(false));
|
||||
CloseBackupableDB();
|
||||
ASSERT_OK(file_manager_->WriteToFile(backupdir_ + "/LATEST_BACKUP", "5"));
|
||||
AssertBackupConsistency(0, 0, keys_iteration * 5, keys_iteration * 6);
|
||||
// assert that all 6 data is gone!
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/meta/6") == false);
|
||||
ASSERT_TRUE(file_manager_->FileExists(backupdir_ + "/private/6") == false);
|
||||
|
||||
// --------- case 3. corrupted backup meta or missing backuped file ----
|
||||
ASSERT_OK(file_manager_->CorruptFile(backupdir_ + "/meta/5", 3));
|
||||
// since 5 meta is now corrupted, latest backup should be 4
|
||||
AssertBackupConsistency(0, 0, keys_iteration * 4, keys_iteration * 5);
|
||||
OpenRestoreDB();
|
||||
s = restore_db_->RestoreDBFromBackup(5, dbname_, dbname_);
|
||||
ASSERT_TRUE(!s.ok());
|
||||
CloseRestoreDB();
|
||||
ASSERT_OK(file_manager_->DeleteRandomFileInDir(backupdir_ + "/private/4"));
|
||||
// 4 is corrupted, 3 is the latest backup now
|
||||
AssertBackupConsistency(0, 0, keys_iteration * 3, keys_iteration * 5);
|
||||
OpenRestoreDB();
|
||||
s = restore_db_->RestoreDBFromBackup(4, dbname_, dbname_);
|
||||
CloseRestoreDB();
|
||||
ASSERT_TRUE(!s.ok());
|
||||
|
||||
// new backup should be 4!
|
||||
OpenBackupableDB();
|
||||
FillDB(db_.get(), keys_iteration * 3, keys_iteration * 4);
|
||||
ASSERT_OK(db_->CreateNewBackup(!!(rnd.Next() % 2)));
|
||||
CloseBackupableDB();
|
||||
AssertBackupConsistency(4, 0, keys_iteration * 4, keys_iteration * 5);
|
||||
}
|
||||
|
||||
// open DB, write, close DB, backup, restore, repeat
|
||||
TEST(BackupableDBTest, OfflineIntegrationTest) {
|
||||
// has to be a big number, so that it triggers the memtable flush
|
||||
const int keys_iteration = 5000;
|
||||
const int max_key = keys_iteration * 4 + 10;
|
||||
// first iter -- flush before backup
|
||||
// second iter -- don't flush before backup
|
||||
for (int iter = 0; iter < 2; ++iter) {
|
||||
// delete old data
|
||||
DestroyDB(dbname_, Options());
|
||||
bool destroy_data = true;
|
||||
|
||||
// every iteration --
|
||||
// 1. insert new data in the DB
|
||||
// 2. backup the DB
|
||||
// 3. destroy the db
|
||||
// 4. restore the db, check everything is still there
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
// in last iteration, put smaller amount of data,
|
||||
int fill_up_to = std::min(keys_iteration * (i + 1), max_key);
|
||||
// ---- insert new data and back up ----
|
||||
OpenBackupableDB(destroy_data);
|
||||
destroy_data = false;
|
||||
FillDB(db_.get(), keys_iteration * i, fill_up_to);
|
||||
ASSERT_OK(db_->CreateNewBackup(iter == 0));
|
||||
CloseBackupableDB();
|
||||
DestroyDB(dbname_, Options());
|
||||
|
||||
// ---- make sure it's empty ----
|
||||
DB* db = OpenDB();
|
||||
AssertEmpty(db, 0, fill_up_to);
|
||||
delete db;
|
||||
|
||||
// ---- restore the DB ----
|
||||
OpenRestoreDB();
|
||||
if (i >= 3) { // test purge old backups
|
||||
// when i == 4, purge to only 1 backup
|
||||
// when i == 3, purge to 2 backups
|
||||
ASSERT_OK(restore_db_->PurgeOldBackups(5 - i));
|
||||
}
|
||||
// ---- make sure the data is there ---
|
||||
AssertBackupConsistency(0, 0, fill_up_to, max_key);
|
||||
CloseRestoreDB();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// open DB, write, backup, write, backup, close, restore
|
||||
TEST(BackupableDBTest, OnlineIntegrationTest) {
|
||||
// has to be a big number, so that it triggers the memtable flush
|
||||
const int keys_iteration = 5000;
|
||||
const int max_key = keys_iteration * 4 + 10;
|
||||
Random rnd(7);
|
||||
// delete old data
|
||||
DestroyDB(dbname_, Options());
|
||||
|
||||
OpenBackupableDB(true);
|
||||
// write some data, backup, repeat
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
if (i == 4) {
|
||||
// delete backup number 2, online delete!
|
||||
OpenRestoreDB();
|
||||
ASSERT_OK(restore_db_->DeleteBackup(2));
|
||||
CloseRestoreDB();
|
||||
}
|
||||
// in last iteration, put smaller amount of data,
|
||||
// so that backups can share sst files
|
||||
int fill_up_to = std::min(keys_iteration * (i + 1), max_key);
|
||||
FillDB(db_.get(), keys_iteration * i, fill_up_to);
|
||||
// we should get consistent results with flush_before_backup
|
||||
// set to both true and false
|
||||
ASSERT_OK(db_->CreateNewBackup(!!(rnd.Next() % 2)));
|
||||
}
|
||||
// close and destroy
|
||||
CloseBackupableDB();
|
||||
DestroyDB(dbname_, Options());
|
||||
|
||||
// ---- make sure it's empty ----
|
||||
DB* db = OpenDB();
|
||||
AssertEmpty(db, 0, max_key);
|
||||
delete db;
|
||||
|
||||
// ---- restore every backup and verify all the data is there ----
|
||||
OpenRestoreDB();
|
||||
for (int i = 1; i <= 5; ++i) {
|
||||
if (i == 2) {
|
||||
// we deleted backup 2
|
||||
Status s = restore_db_->RestoreDBFromBackup(2, dbname_, dbname_);
|
||||
ASSERT_TRUE(!s.ok());
|
||||
} else {
|
||||
int fill_up_to = std::min(keys_iteration * i, max_key);
|
||||
AssertBackupConsistency(i, 0, fill_up_to, max_key);
|
||||
}
|
||||
}
|
||||
|
||||
// delete some backups -- this should leave only backups 3 and 5 alive
|
||||
ASSERT_OK(restore_db_->DeleteBackup(4));
|
||||
ASSERT_OK(restore_db_->PurgeOldBackups(2));
|
||||
|
||||
std::vector<BackupInfo> backup_info;
|
||||
restore_db_->GetBackupInfo(&backup_info);
|
||||
ASSERT_EQ(2UL, backup_info.size());
|
||||
|
||||
// check backup 3
|
||||
AssertBackupConsistency(3, 0, 3 * keys_iteration, max_key);
|
||||
// check backup 5
|
||||
AssertBackupConsistency(5, 0, max_key);
|
||||
|
||||
CloseRestoreDB();
|
||||
}
|
||||
|
||||
TEST(BackupableDBTest, DeleteNewerBackups) {
|
||||
// create backups 1, 2, 3, 4, 5
|
||||
OpenBackupableDB(true);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
FillDB(db_.get(), 100 * i, 100 * (i + 1));
|
||||
ASSERT_OK(db_->CreateNewBackup(!!(i % 2)));
|
||||
}
|
||||
CloseBackupableDB();
|
||||
|
||||
// backup 3 is fine
|
||||
AssertBackupConsistency(3, 0, 300, 500);
|
||||
// this should delete backups 4 and 5
|
||||
OpenBackupableDB();
|
||||
CloseBackupableDB();
|
||||
// backups 4 and 5 don't exist
|
||||
OpenRestoreDB();
|
||||
Status s = restore_db_->RestoreDBFromBackup(4, dbname_, dbname_);
|
||||
ASSERT_TRUE(s.IsNotFound());
|
||||
s = restore_db_->RestoreDBFromBackup(5, dbname_, dbname_);
|
||||
ASSERT_TRUE(s.IsNotFound());
|
||||
CloseRestoreDB();
|
||||
}
|
||||
|
||||
} // anon namespace
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
return rocksdb::test::RunAllTests();
|
||||
}
|
BIN
utilities/merge_operators/.DS_Store
vendored
BIN
utilities/merge_operators/.DS_Store
vendored
Binary file not shown.
@ -41,9 +41,7 @@ std::shared_ptr<DB> OpenTtlDb(char delim_char) {
|
||||
Options options;
|
||||
options.create_if_missing = true;
|
||||
options.merge_operator.reset(new StringAppendTESTOperator(delim_char));
|
||||
Status s;
|
||||
db = new DBWithTTL(123456, options, kDbName, s, false);
|
||||
ASSERT_OK(s);
|
||||
ASSERT_OK(UtilityDB::OpenTtlDB(options, kDbName, &db, 123456));
|
||||
return std::shared_ptr<DB>(db);
|
||||
}
|
||||
|
||||
@ -53,6 +51,7 @@ class StringLists {
|
||||
public:
|
||||
|
||||
//Constructor: specifies the rocksdb db
|
||||
/* implicit */
|
||||
StringLists(std::shared_ptr<DB> db)
|
||||
: db_(db),
|
||||
merge_option_(),
|
||||
@ -75,7 +74,7 @@ class StringLists {
|
||||
|
||||
// Returns the list of strings associated with key (or "" if does not exist)
|
||||
bool Get(const std::string& key, std::string* const result){
|
||||
assert(result != NULL); // we should have a place to store the result
|
||||
assert(result != nullptr); // we should have a place to store the result
|
||||
auto s = db_->Get(get_option_, key, result);
|
||||
|
||||
if (s.ok()) {
|
||||
|
@ -10,40 +10,27 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
// Open the db inside DBWithTTL because options needs pointer to its ttl
|
||||
DBWithTTL::DBWithTTL(const int32_t ttl,
|
||||
const Options& options,
|
||||
const std::string& dbname,
|
||||
Status& st,
|
||||
bool read_only)
|
||||
: StackableDB(nullptr) {
|
||||
Options options_to_open = options;
|
||||
|
||||
if (options.compaction_filter) {
|
||||
ttl_comp_filter_.reset(
|
||||
new TtlCompactionFilter(ttl, options.compaction_filter));
|
||||
options_to_open.compaction_filter = ttl_comp_filter_.get();
|
||||
void DBWithTTL::SanitizeOptions(int32_t ttl, Options* options) {
|
||||
if (options->compaction_filter) {
|
||||
options->compaction_filter =
|
||||
new TtlCompactionFilter(ttl, options->compaction_filter);
|
||||
} else {
|
||||
options_to_open.compaction_filter_factory =
|
||||
std::shared_ptr<CompactionFilterFactory>(
|
||||
new TtlCompactionFilterFactory(
|
||||
ttl, options.compaction_filter_factory));
|
||||
options->compaction_filter_factory =
|
||||
std::shared_ptr<CompactionFilterFactory>(new TtlCompactionFilterFactory(
|
||||
ttl, options->compaction_filter_factory));
|
||||
}
|
||||
|
||||
if (options.merge_operator) {
|
||||
options_to_open.merge_operator.reset(
|
||||
new TtlMergeOperator(options.merge_operator));
|
||||
}
|
||||
|
||||
if (read_only) {
|
||||
st = DB::OpenForReadOnly(options_to_open, dbname, &db_);
|
||||
} else {
|
||||
st = DB::Open(options_to_open, dbname, &db_);
|
||||
if (options->merge_operator) {
|
||||
options->merge_operator.reset(
|
||||
new TtlMergeOperator(options->merge_operator));
|
||||
}
|
||||
}
|
||||
|
||||
// Open the db inside DBWithTTL because options needs pointer to its ttl
|
||||
DBWithTTL::DBWithTTL(DB* db) : StackableDB(db) {}
|
||||
|
||||
DBWithTTL::~DBWithTTL() {
|
||||
delete db_;
|
||||
delete GetOptions().compaction_filter;
|
||||
}
|
||||
|
||||
Status UtilityDB::OpenTtlDB(
|
||||
@ -53,9 +40,19 @@ Status UtilityDB::OpenTtlDB(
|
||||
int32_t ttl,
|
||||
bool read_only) {
|
||||
Status st;
|
||||
*dbptr = new DBWithTTL(ttl, options, dbname, st, read_only);
|
||||
if (!st.ok()) {
|
||||
delete *dbptr;
|
||||
Options options_to_open = options;
|
||||
DBWithTTL::SanitizeOptions(ttl, &options_to_open);
|
||||
DB* db;
|
||||
|
||||
if (read_only) {
|
||||
st = DB::OpenForReadOnly(options_to_open, dbname, &db);
|
||||
} else {
|
||||
st = DB::Open(options_to_open, dbname, &db);
|
||||
}
|
||||
if (st.ok()) {
|
||||
*dbptr = new DBWithTTL(db);
|
||||
} else {
|
||||
delete db;
|
||||
}
|
||||
return st;
|
||||
}
|
||||
@ -122,10 +119,8 @@ Status DBWithTTL::StripTS(std::string* str) {
|
||||
return st;
|
||||
}
|
||||
|
||||
Status DBWithTTL::Put(
|
||||
const WriteOptions& opt,
|
||||
const Slice& key,
|
||||
const Slice& val) {
|
||||
Status DBWithTTL::Put(const WriteOptions& opt, const Slice& key,
|
||||
const Slice& val) {
|
||||
WriteBatch batch;
|
||||
batch.Put(key, val);
|
||||
return Write(opt, &batch);
|
||||
@ -166,10 +161,6 @@ bool DBWithTTL::KeyMayExist(const ReadOptions& options,
|
||||
return ret;
|
||||
}
|
||||
|
||||
Status DBWithTTL::Delete(const WriteOptions& wopts, const Slice& key) {
|
||||
return db_->Delete(wopts, key);
|
||||
}
|
||||
|
||||
Status DBWithTTL::Merge(const WriteOptions& opt,
|
||||
const Slice& key,
|
||||
const Slice& value) {
|
||||
@ -221,86 +212,6 @@ Iterator* DBWithTTL::NewIterator(const ReadOptions& opts) {
|
||||
return new TtlIterator(db_->NewIterator(opts));
|
||||
}
|
||||
|
||||
const Snapshot* DBWithTTL::GetSnapshot() {
|
||||
return db_->GetSnapshot();
|
||||
}
|
||||
|
||||
void DBWithTTL::ReleaseSnapshot(const Snapshot* snapshot) {
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
}
|
||||
|
||||
bool DBWithTTL::GetProperty(const Slice& property, std::string* value) {
|
||||
return db_->GetProperty(property, value);
|
||||
}
|
||||
|
||||
void DBWithTTL::GetApproximateSizes(const Range* r, int n, uint64_t* sizes) {
|
||||
db_->GetApproximateSizes(r, n, sizes);
|
||||
}
|
||||
|
||||
void DBWithTTL::CompactRange(const Slice* begin, const Slice* end,
|
||||
bool reduce_level, int target_level) {
|
||||
db_->CompactRange(begin, end, reduce_level, target_level);
|
||||
}
|
||||
|
||||
int DBWithTTL::NumberLevels() {
|
||||
return db_->NumberLevels();
|
||||
}
|
||||
|
||||
int DBWithTTL::MaxMemCompactionLevel() {
|
||||
return db_->MaxMemCompactionLevel();
|
||||
}
|
||||
|
||||
int DBWithTTL::Level0StopWriteTrigger() {
|
||||
return db_->Level0StopWriteTrigger();
|
||||
}
|
||||
|
||||
Env* DBWithTTL::GetEnv() const {
|
||||
return db_->GetEnv();
|
||||
}
|
||||
|
||||
const Options& DBWithTTL::GetOptions() const {
|
||||
return db_->GetOptions();
|
||||
}
|
||||
|
||||
Status DBWithTTL::Flush(const FlushOptions& fopts) {
|
||||
return db_->Flush(fopts);
|
||||
}
|
||||
|
||||
Status DBWithTTL::DisableFileDeletions() {
|
||||
return db_->DisableFileDeletions();
|
||||
}
|
||||
|
||||
Status DBWithTTL::EnableFileDeletions() {
|
||||
return db_->EnableFileDeletions();
|
||||
}
|
||||
|
||||
Status DBWithTTL::GetLiveFiles(std::vector<std::string>& vec, uint64_t* mfs,
|
||||
bool flush_memtable) {
|
||||
return db_->GetLiveFiles(vec, mfs, flush_memtable);
|
||||
}
|
||||
|
||||
SequenceNumber DBWithTTL::GetLatestSequenceNumber() const {
|
||||
return db_->GetLatestSequenceNumber();
|
||||
}
|
||||
|
||||
Status DBWithTTL::GetSortedWalFiles(VectorLogPtr& files) {
|
||||
return db_->GetSortedWalFiles(files);
|
||||
}
|
||||
|
||||
Status DBWithTTL::DeleteFile(std::string name) {
|
||||
return db_->DeleteFile(name);
|
||||
}
|
||||
|
||||
Status DBWithTTL::GetDbIdentity(std::string& identity) {
|
||||
return db_->GetDbIdentity(identity);
|
||||
}
|
||||
|
||||
Status DBWithTTL::GetUpdatesSince(
|
||||
SequenceNumber seq_number,
|
||||
unique_ptr<TransactionLogIterator>* iter) {
|
||||
return db_->GetUpdatesSince(seq_number, iter);
|
||||
}
|
||||
|
||||
void DBWithTTL::TEST_Destroy_DBWithTtl() {
|
||||
((DBImpl*) db_)->TEST_Destroy_DBImpl();
|
||||
}
|
||||
|
@ -14,82 +14,33 @@ namespace rocksdb {
|
||||
|
||||
class DBWithTTL : public StackableDB {
|
||||
public:
|
||||
DBWithTTL(const int32_t ttl,
|
||||
const Options& options,
|
||||
const std::string& dbname,
|
||||
Status& st,
|
||||
bool read_only);
|
||||
static void SanitizeOptions(int32_t ttl, Options* options);
|
||||
|
||||
explicit DBWithTTL(DB* db);
|
||||
|
||||
virtual ~DBWithTTL();
|
||||
|
||||
virtual Status Put(const WriteOptions& o,
|
||||
const Slice& key,
|
||||
const Slice& val);
|
||||
virtual Status Put(const WriteOptions& o, const Slice& key,
|
||||
const Slice& val) override;
|
||||
|
||||
virtual Status Get(const ReadOptions& options,
|
||||
const Slice& key,
|
||||
std::string* value);
|
||||
virtual Status Get(const ReadOptions& options, const Slice& key,
|
||||
std::string* value) override;
|
||||
|
||||
virtual std::vector<Status> MultiGet(const ReadOptions& options,
|
||||
const std::vector<Slice>& keys,
|
||||
std::vector<std::string>* values);
|
||||
virtual std::vector<Status> MultiGet(
|
||||
const ReadOptions& options, const std::vector<Slice>& keys,
|
||||
std::vector<std::string>* values) override;
|
||||
|
||||
virtual bool KeyMayExist(const ReadOptions& options,
|
||||
const Slice& key,
|
||||
std::string* value,
|
||||
bool* value_found = nullptr) override;
|
||||
|
||||
virtual Status Delete(const WriteOptions& wopts, const Slice& key);
|
||||
virtual Status Merge(const WriteOptions& options, const Slice& key,
|
||||
const Slice& value) override;
|
||||
|
||||
virtual Status Merge(const WriteOptions& options,
|
||||
const Slice& key,
|
||||
const Slice& value);
|
||||
virtual Status Write(const WriteOptions& opts, WriteBatch* updates) override;
|
||||
|
||||
|
||||
virtual Status Write(const WriteOptions& opts, WriteBatch* updates);
|
||||
|
||||
virtual Iterator* NewIterator(const ReadOptions& opts);
|
||||
|
||||
virtual const Snapshot* GetSnapshot();
|
||||
|
||||
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
||||
|
||||
virtual bool GetProperty(const Slice& property, std::string* value);
|
||||
|
||||
virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes);
|
||||
|
||||
virtual void CompactRange(const Slice* begin, const Slice* end,
|
||||
bool reduce_level = false, int target_level = -1);
|
||||
|
||||
virtual int NumberLevels();
|
||||
|
||||
virtual int MaxMemCompactionLevel();
|
||||
|
||||
virtual int Level0StopWriteTrigger();
|
||||
|
||||
virtual Env* GetEnv() const;
|
||||
|
||||
virtual const Options& GetOptions() const;
|
||||
|
||||
virtual Status Flush(const FlushOptions& fopts);
|
||||
|
||||
virtual Status DisableFileDeletions();
|
||||
|
||||
virtual Status EnableFileDeletions();
|
||||
|
||||
virtual Status GetLiveFiles(std::vector<std::string>& vec, uint64_t* mfs,
|
||||
bool flush_memtable = true);
|
||||
|
||||
virtual Status GetSortedWalFiles(VectorLogPtr& files);
|
||||
|
||||
virtual Status DeleteFile(std::string name);
|
||||
|
||||
virtual Status GetDbIdentity(std::string& identity);
|
||||
|
||||
virtual SequenceNumber GetLatestSequenceNumber() const;
|
||||
|
||||
virtual Status GetUpdatesSince(SequenceNumber seq_number,
|
||||
unique_ptr<TransactionLogIterator>* iter);
|
||||
virtual Iterator* NewIterator(const ReadOptions& opts) override;
|
||||
|
||||
// Simulate a db crash, no elegant closing of database.
|
||||
void TEST_Destroy_DBWithTtl();
|
||||
@ -113,10 +64,6 @@ class DBWithTTL : public StackableDB {
|
||||
static const int32_t kMinTimestamp = 1368146402; // 05/09/2013:5:40PM GMT-8
|
||||
|
||||
static const int32_t kMaxTimestamp = 2147483647; // 01/18/2038:7:14PM GMT-8
|
||||
|
||||
private:
|
||||
DB* db_;
|
||||
unique_ptr<CompactionFilter> ttl_comp_filter_;
|
||||
};
|
||||
|
||||
class TtlIterator : public Iterator {
|
||||
|
Loading…
Reference in New Issue
Block a user