[RocksDB][Performance Branch] Make height and branching factor configurable for skiplist implementation

Summary: As title. Especially, HashSkipListRepFactory will be able to specify a relatively small height, to reduce the memory overhead of one skiplist per bucket.

Test Plan: make check and test it on leaf4

Reviewers: dhruba, sdong, kailiu

CC: reconnect.grayhat, leveldb

Differential Revision: https://reviews.facebook.net/D14307
This commit is contained in:
Haobo Xu 2013-11-22 13:31:00 -08:00
parent 8aac46d686
commit 4e6463ea44
4 changed files with 64 additions and 25 deletions

View File

@ -22,6 +22,7 @@ DEFINE_uint64(items_per_prefix, 10, "total number of values per prefix");
DEFINE_int64(write_buffer_size, 1000000000, "");
DEFINE_int64(max_write_buffer_number, 8, "");
DEFINE_int64(min_write_buffer_number_to_merge, 7, "");
DEFINE_int32(skiplist_height, 4, "");
// Path to the database on file system
const std::string kDbName = rocksdb::test::TmpDir() + "/prefix_test";
@ -111,7 +112,8 @@ class PrefixTest {
options.prefix_extractor = prefix_extractor;
if (FLAGS_use_nolock_version) {
options.memtable_factory.reset(NewHashSkipListRepFactory(
prefix_extractor, FLAGS_bucket_count));
prefix_extractor, FLAGS_bucket_count,
FLAGS_skiplist_height));
} else {
options.memtable_factory =
std::make_shared<rocksdb::PrefixHashRepFactory>(
@ -152,7 +154,7 @@ TEST(PrefixTest, DynamicPrefixIterator) {
TestKey test_key(prefix, sorted);
Slice key = TestKeyToSlice(test_key);
std::string value = "v" + std::to_string(sorted);
std::string value(40, 0);
ASSERT_OK(db->Put(write_options, key, value));
}

View File

@ -47,7 +47,8 @@ class SkipList {
// Create a new SkipList object that will use "cmp" for comparing keys,
// and will allocate memory using "*arena". Objects allocated in the arena
// must remain allocated for the lifetime of the skiplist object.
explicit SkipList(Comparator cmp, Arena* arena);
explicit SkipList(Comparator cmp, Arena* arena,
int32_t max_height = 12, int32_t branching_factor = 4);
// Insert key into the list.
// REQUIRES: nothing that compares equal to key is currently in the list.
@ -101,7 +102,8 @@ class SkipList {
};
private:
enum { kMaxHeight = 12 };
const int32_t kMaxHeight_;
const int32_t kBranching_;
// Immutable after construction
Comparator const compare_;
@ -114,8 +116,8 @@ class SkipList {
port::AtomicPointer max_height_; // Height of the entire list
// Used for optimizing sequential insert patterns
Node* prev_[kMaxHeight];
int prev_height_;
Node** prev_;
int32_t prev_height_;
inline int GetMaxHeight() const {
return static_cast<int>(
@ -257,13 +259,12 @@ inline void SkipList<Key,Comparator>::Iterator::SeekToLast() {
template<typename Key, class Comparator>
int SkipList<Key,Comparator>::RandomHeight() {
// Increase height with probability 1 in kBranching
static const unsigned int kBranching = 4;
int height = 1;
while (height < kMaxHeight && ((rnd_.Next() % kBranching) == 0)) {
while (height < kMaxHeight_ && ((rnd_.Next() % kBranching_) == 0)) {
height++;
}
assert(height > 0);
assert(height <= kMaxHeight);
assert(height <= kMaxHeight_);
return height;
}
@ -353,14 +354,24 @@ typename SkipList<Key,Comparator>::Node* SkipList<Key,Comparator>::FindLast()
}
template<typename Key, class Comparator>
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena)
: compare_(cmp),
SkipList<Key,Comparator>::SkipList(Comparator cmp, Arena* arena,
int32_t max_height,
int32_t branching_factor)
: kMaxHeight_(max_height),
kBranching_(branching_factor),
compare_(cmp),
arena_(arena),
head_(NewNode(0 /* any key will do */, kMaxHeight)),
head_(NewNode(0 /* any key will do */, max_height)),
max_height_(reinterpret_cast<void*>(1)),
prev_height_(1),
rnd_(0xdeadbeef) {
for (int i = 0; i < kMaxHeight; i++) {
assert(kMaxHeight_ > 0);
assert(kBranching_ > 0);
// Allocate the prev_ Node* array, directly from the passed-in arena.
// prev_ does not need to be freed, as its life cycle is tied up with
// the arena as a whole.
prev_ = (Node**) arena_->AllocateAligned(sizeof(Node*) * kMaxHeight_);
for (int i = 0; i < kMaxHeight_; i++) {
head_->SetNext(i, nullptr);
prev_[i] = head_;
}

View File

@ -267,9 +267,16 @@ public:
// The same as TransformRepFactory except it doesn't use locks.
// Experimental, will replace TransformRepFactory once we are sure
// it performs better
// it performs better. It contains a fixed array of buckets, each
// pointing to a skiplist (null if the bucket is empty).
// bucket_count: number of fixed array buckets
// skiplist_height: the max height of the skiplist
// skiplist_branching_factor: probabilistic size ratio between adjacent
// link lists in the skiplist
extern MemTableRepFactory* NewHashSkipListRepFactory(
const SliceTransform* transform, size_t bucket_count = 1000000);
const SliceTransform* transform, size_t bucket_count = 1000000,
int32_t skiplist_height = 4, int32_t skiplist_branching_factor = 4
);
}

View File

@ -20,7 +20,8 @@ namespace {
class HashSkipListRep : public MemTableRep {
public:
HashSkipListRep(MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform, size_t bucket_size);
const SliceTransform* transform, size_t bucket_size,
int32_t skiplist_height, int32_t skiplist_branching_factor);
virtual void Insert(const char* key) override;
@ -47,6 +48,9 @@ class HashSkipListRep : public MemTableRep {
size_t bucket_size_;
const int32_t skiplist_height_;
const int32_t skiplist_branching_factor_;
// Maps slices (which are transformed user keys) to buckets of keys sharing
// the same transform.
port::AtomicPointer* buckets_;
@ -215,8 +219,12 @@ class HashSkipListRep : public MemTableRep {
};
HashSkipListRep::HashSkipListRep(MemTableRep::KeyComparator& compare,
Arena* arena, const SliceTransform* transform, size_t bucket_size)
Arena* arena, const SliceTransform* transform,
size_t bucket_size, int32_t skiplist_height,
int32_t skiplist_branching_factor)
: bucket_size_(bucket_size),
skiplist_height_(skiplist_height),
skiplist_branching_factor_(skiplist_branching_factor),
transform_(transform),
compare_(compare),
arena_(arena),
@ -239,7 +247,8 @@ HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket(
auto bucket = GetBucket(hash);
if (bucket == nullptr) {
auto addr = arena_->AllocateAligned(sizeof(Bucket));
bucket = new (addr) Bucket(compare_, arena_);
bucket = new (addr) Bucket(compare_, arena_, skiplist_height_,
skiplist_branching_factor_);
buckets_[hash].Release_Store(static_cast<void*>(bucket));
}
return bucket;
@ -302,17 +311,23 @@ std::shared_ptr<MemTableRep::Iterator>
class HashSkipListRepFactory : public MemTableRepFactory {
public:
explicit HashSkipListRepFactory(const SliceTransform* transform,
size_t bucket_count = 1000000)
: transform_(transform),
bucket_count_(bucket_count) { }
explicit HashSkipListRepFactory(
const SliceTransform* transform,
size_t bucket_count,
int32_t skiplist_height,
int32_t skiplist_branching_factor)
: transform_(transform),
bucket_count_(bucket_count),
skiplist_height_(skiplist_height),
skiplist_branching_factor_(skiplist_branching_factor) { }
virtual ~HashSkipListRepFactory() { delete transform_; }
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
MemTableRep::KeyComparator& compare, Arena* arena) override {
return std::make_shared<HashSkipListRep>(compare, arena, transform_,
bucket_count_);
bucket_count_, skiplist_height_,
skiplist_branching_factor_);
}
virtual const char* Name() const override {
@ -324,11 +339,15 @@ class HashSkipListRepFactory : public MemTableRepFactory {
private:
const SliceTransform* transform_;
const size_t bucket_count_;
const int32_t skiplist_height_;
const int32_t skiplist_branching_factor_;
};
MemTableRepFactory* NewHashSkipListRepFactory(
const SliceTransform* transform, size_t bucket_count) {
return new HashSkipListRepFactory(transform, bucket_count);
const SliceTransform* transform, size_t bucket_count,
int32_t skiplist_height, int32_t skiplist_branching_factor) {
return new HashSkipListRepFactory(transform, bucket_count,
skiplist_height, skiplist_branching_factor);
}
} // namespace rocksdb