TransformRep - use array instead of unordered_map
Summary: I'm sending this diff together with https://reviews.facebook.net/D13881 because it didn't allow me to send only the array one. Here I also replaced unordered_map with just an array of shared_ptrs. This elminated all the locks. I will run the new benchmark and post the results here. Test Plan: db_test Reviewers: dhruba, haobo Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D13893
This commit is contained in:
parent
fe4a449472
commit
be96f2498e
159
db/db_test.cc
159
db/db_test.cc
@ -4649,85 +4649,90 @@ void PrefixScanInit(DBTest *dbtest) {
|
||||
}
|
||||
|
||||
TEST(DBTest, PrefixScan) {
|
||||
ReadOptions ro = ReadOptions();
|
||||
int count;
|
||||
Slice prefix;
|
||||
Slice key;
|
||||
char buf[100];
|
||||
Iterator* iter;
|
||||
snprintf(buf, sizeof(buf), "03______:");
|
||||
prefix = Slice(buf, 8);
|
||||
key = Slice(buf, 9);
|
||||
auto prefix_extractor = NewFixedPrefixTransform(8);
|
||||
auto memtable_factory =
|
||||
std::make_shared<PrefixHashRepFactory>(prefix_extractor);
|
||||
|
||||
// db configs
|
||||
env_->count_random_reads_ = true;
|
||||
Options options = CurrentOptions();
|
||||
options.env = env_;
|
||||
options.block_cache = NewLRUCache(0); // Prevent cache hits
|
||||
options.filter_policy = NewBloomFilterPolicy(10);
|
||||
options.prefix_extractor = prefix_extractor;
|
||||
options.whole_key_filtering = false;
|
||||
options.disable_auto_compactions = true;
|
||||
options.max_background_compactions = 2;
|
||||
options.create_if_missing = true;
|
||||
options.disable_seek_compaction = true;
|
||||
options.memtable_factory = memtable_factory;
|
||||
|
||||
// prefix specified, with blooms: 2 RAND I/Os
|
||||
// SeekToFirst
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
ro.prefix = &prefix;
|
||||
iter = db_->NewIterator(ro);
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
assert(iter->key().starts_with(prefix));
|
||||
count++;
|
||||
}
|
||||
ASSERT_OK(iter->status());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
||||
|
||||
// prefix specified, with blooms: 2 RAND I/Os
|
||||
// Seek
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
ro.prefix = &prefix;
|
||||
iter = db_->NewIterator(ro);
|
||||
for (iter->Seek(key); iter->Valid(); iter->Next()) {
|
||||
assert(iter->key().starts_with(prefix));
|
||||
count++;
|
||||
}
|
||||
ASSERT_OK(iter->status());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
||||
|
||||
// no prefix specified: 11 RAND I/Os
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
iter = db_->NewIterator(ReadOptions());
|
||||
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
|
||||
if (! iter->key().starts_with(prefix)) {
|
||||
break;
|
||||
for (int it = 0; it < 2; ++it) {
|
||||
ReadOptions ro = ReadOptions();
|
||||
int count;
|
||||
Slice prefix;
|
||||
Slice key;
|
||||
char buf[100];
|
||||
Iterator* iter;
|
||||
snprintf(buf, sizeof(buf), "03______:");
|
||||
prefix = Slice(buf, 8);
|
||||
key = Slice(buf, 9);
|
||||
auto prefix_extractor = NewFixedPrefixTransform(8);
|
||||
// db configs
|
||||
env_->count_random_reads_ = true;
|
||||
Options options = CurrentOptions();
|
||||
options.env = env_;
|
||||
options.block_cache = NewLRUCache(0); // Prevent cache hits
|
||||
options.filter_policy = NewBloomFilterPolicy(10);
|
||||
options.prefix_extractor = prefix_extractor;
|
||||
options.whole_key_filtering = false;
|
||||
options.disable_auto_compactions = true;
|
||||
options.max_background_compactions = 2;
|
||||
options.create_if_missing = true;
|
||||
options.disable_seek_compaction = true;
|
||||
if (it == 0) {
|
||||
options.memtable_factory = std::make_shared<PrefixHashRepNoLockFactory>(
|
||||
prefix_extractor);
|
||||
} else {
|
||||
options.memtable_factory = std::make_shared<PrefixHashRepFactory>(
|
||||
prefix_extractor);
|
||||
}
|
||||
count++;
|
||||
|
||||
// prefix specified, with blooms: 2 RAND I/Os
|
||||
// SeekToFirst
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
ro.prefix = &prefix;
|
||||
iter = db_->NewIterator(ro);
|
||||
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
||||
assert(iter->key().starts_with(prefix));
|
||||
count++;
|
||||
}
|
||||
ASSERT_OK(iter->status());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
||||
|
||||
// prefix specified, with blooms: 2 RAND I/Os
|
||||
// Seek
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
ro.prefix = &prefix;
|
||||
iter = db_->NewIterator(ro);
|
||||
for (iter->Seek(key); iter->Valid(); iter->Next()) {
|
||||
assert(iter->key().starts_with(prefix));
|
||||
count++;
|
||||
}
|
||||
ASSERT_OK(iter->status());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
||||
|
||||
// no prefix specified: 11 RAND I/Os
|
||||
DestroyAndReopen(&options);
|
||||
PrefixScanInit(this);
|
||||
count = 0;
|
||||
env_->random_read_counter_.Reset();
|
||||
iter = db_->NewIterator(ReadOptions());
|
||||
for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
|
||||
if (! iter->key().starts_with(prefix)) {
|
||||
break;
|
||||
}
|
||||
count++;
|
||||
}
|
||||
ASSERT_OK(iter->status());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 11);
|
||||
Close();
|
||||
delete options.filter_policy;
|
||||
}
|
||||
ASSERT_OK(iter->status());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 11);
|
||||
Close();
|
||||
delete options.filter_policy;
|
||||
}
|
||||
|
||||
std::string MakeKey(unsigned int num) {
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "util/testharness.h"
|
||||
|
||||
DEFINE_bool(use_prefix_hash_memtable, true, "");
|
||||
DEFINE_bool(use_nolock_version, true, "");
|
||||
DEFINE_bool(trigger_deadlock, false,
|
||||
"issue delete in range scan to trigger PrefixHashMap deadlock");
|
||||
DEFINE_uint64(bucket_count, 100000, "number of buckets");
|
||||
@ -93,15 +94,24 @@ class PrefixTest {
|
||||
if (FLAGS_use_prefix_hash_memtable) {
|
||||
auto prefix_extractor = NewFixedPrefixTransform(8);
|
||||
options.prefix_extractor = prefix_extractor;
|
||||
options.memtable_factory =
|
||||
std::make_shared<rocksdb::PrefixHashRepFactory>(
|
||||
prefix_extractor, FLAGS_bucket_count, FLAGS_num_locks);
|
||||
if (FLAGS_use_nolock_version) {
|
||||
options.memtable_factory =
|
||||
std::make_shared<rocksdb::PrefixHashRepNoLockFactory>(
|
||||
prefix_extractor, FLAGS_bucket_count);
|
||||
} else {
|
||||
options.memtable_factory =
|
||||
std::make_shared<rocksdb::PrefixHashRepFactory>(
|
||||
prefix_extractor, FLAGS_bucket_count, FLAGS_num_locks);
|
||||
}
|
||||
}
|
||||
|
||||
Status s = DB::Open(options, kDbName, &db);
|
||||
ASSERT_OK(s);
|
||||
return std::shared_ptr<DB>(db);
|
||||
}
|
||||
~PrefixTest() {
|
||||
delete options.comparator;
|
||||
}
|
||||
protected:
|
||||
Options options;
|
||||
};
|
||||
|
@ -15,11 +15,11 @@
|
||||
// Users can implement their own memtable representations. We include four
|
||||
// types built in:
|
||||
// - SkipListRep: This is the default; it is backed by a skip list.
|
||||
// - TransformRep: This is backed by an std::unordered_map<Slice,
|
||||
// std::set>. On construction, they are given a SliceTransform object. This
|
||||
// - TransformRep: This is backed by an custom hash map.
|
||||
// On construction, they are given a SliceTransform object. This
|
||||
// object is applied to the user key of stored items which indexes into the
|
||||
// unordered map to yield a set containing all records that share the same user
|
||||
// key under the transform function.
|
||||
// hash map to yield a skiplist containing all records that share the same
|
||||
// user key under the transform function.
|
||||
// - UnsortedRep: A subclass of TransformRep where the transform function is
|
||||
// the identity function. Optimized for point lookups.
|
||||
// - PrefixHashRep: A subclass of TransformRep where the transform function is
|
||||
@ -254,6 +254,68 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// NO LOCKS VERSION
|
||||
|
||||
// The same as TransformRepFactory except it doesn't use locks.
|
||||
// Experimental, will replace TransformRepFactory once we are sure
|
||||
// it performs better
|
||||
class TransformRepNoLockFactory : public MemTableRepFactory {
|
||||
public:
|
||||
explicit TransformRepNoLockFactory(const SliceTransform* transform,
|
||||
size_t bucket_count)
|
||||
: transform_(transform),
|
||||
bucket_count_(bucket_count) { }
|
||||
|
||||
virtual ~TransformRepNoLockFactory() { delete transform_; }
|
||||
|
||||
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
|
||||
MemTableRep::KeyComparator&, Arena*) override;
|
||||
|
||||
virtual const char* Name() const override {
|
||||
return "TransformRepNoLockFactory";
|
||||
}
|
||||
|
||||
const SliceTransform* GetTransform() { return transform_; }
|
||||
|
||||
protected:
|
||||
const SliceTransform* transform_;
|
||||
const size_t bucket_count_;
|
||||
};
|
||||
|
||||
// UnsortedReps bin user keys based on an identity function transform -- that
|
||||
// is, transform(key) = key. This optimizes for point look-ups.
|
||||
//
|
||||
// Parameters: See TransformRepNoLockFactory.
|
||||
class UnsortedRepNoLockFactory : public TransformRepNoLockFactory {
|
||||
public:
|
||||
explicit UnsortedRepNoLockFactory(size_t bucket_count = 1000000)
|
||||
: TransformRepNoLockFactory(NewNoopTransform(),
|
||||
bucket_count) { }
|
||||
virtual const char* Name() const override {
|
||||
return "UnsortedRepNoLockFactory";
|
||||
}
|
||||
};
|
||||
|
||||
// PrefixHashReps bin user keys based on a fixed-size prefix. This optimizes for
|
||||
// short ranged scans over a given prefix.
|
||||
//
|
||||
// Parameters: See TransformRepNoLockFactory.
|
||||
class PrefixHashRepNoLockFactory : public TransformRepNoLockFactory {
|
||||
public:
|
||||
explicit PrefixHashRepNoLockFactory(const SliceTransform* prefix_extractor,
|
||||
size_t bucket_count = 1000000)
|
||||
: TransformRepNoLockFactory(prefix_extractor, bucket_count)
|
||||
{ }
|
||||
|
||||
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
|
||||
MemTableRep::KeyComparator&, Arena*) override;
|
||||
|
||||
virtual const char* Name() const override {
|
||||
return "PrefixHashRepNoLockFactory";
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // STORAGE_ROCKSDB_DB_MEMTABLEREP_H_
|
||||
|
262
util/transformrepnolock.cc
Normal file
262
util/transformrepnolock.cc
Normal file
@ -0,0 +1,262 @@
|
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
|
||||
#include "rocksdb/memtablerep.h"
|
||||
#include "rocksdb/arena.h"
|
||||
#include "rocksdb/slice.h"
|
||||
#include "rocksdb/slice_transform.h"
|
||||
#include "port/port.h"
|
||||
#include "port/atomic_pointer.h"
|
||||
#include "util/murmurhash.h"
|
||||
#include "db/skiplist.h"
|
||||
|
||||
namespace rocksdb {
|
||||
namespace {
|
||||
|
||||
class TransformRepNoLock : public MemTableRep {
|
||||
public:
|
||||
TransformRepNoLock(MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform, size_t bucket_size);
|
||||
|
||||
virtual void Insert(const char* key) override;
|
||||
|
||||
virtual bool Contains(const char* key) const override;
|
||||
|
||||
virtual size_t ApproximateMemoryUsage() override;
|
||||
|
||||
virtual ~TransformRepNoLock();
|
||||
|
||||
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator() override;
|
||||
|
||||
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator(
|
||||
const Slice& slice) override;
|
||||
|
||||
std::shared_ptr<MemTableRep::Iterator> GetTransformIterator(
|
||||
const Slice& transformed);
|
||||
|
||||
private:
|
||||
typedef SkipList<const char*, MemTableRep::KeyComparator&> Bucket;
|
||||
|
||||
size_t bucket_size_;
|
||||
|
||||
// Maps slices (which are transformed user keys) to buckets of keys sharing
|
||||
// the same transform.
|
||||
port::AtomicPointer* buckets_;
|
||||
|
||||
// The user-supplied transform whose domain is the user keys.
|
||||
const SliceTransform* transform_;
|
||||
|
||||
MemTableRep::KeyComparator& compare_;
|
||||
// immutable after construction
|
||||
Arena* const arena_;
|
||||
|
||||
inline size_t GetHash(const Slice& slice) const {
|
||||
return MurmurHash(slice.data(), slice.size(), 0) % bucket_size_;
|
||||
}
|
||||
inline Bucket* GetBucket(size_t i) const {
|
||||
return static_cast<Bucket*>(buckets_[i].Acquire_Load());
|
||||
}
|
||||
inline Bucket* GetBucket(const Slice& slice) const {
|
||||
return GetBucket(GetHash(slice));
|
||||
}
|
||||
// Get a bucket from buckets_. If the bucket hasn't been initialized yet,
|
||||
// initialize it before returning.
|
||||
Bucket* GetInitializedBucket(const Slice& transformed);
|
||||
|
||||
class Iterator : public MemTableRep::Iterator {
|
||||
public:
|
||||
explicit Iterator(Bucket* list, bool own_list = true)
|
||||
: list_(list),
|
||||
iter_(list),
|
||||
own_list_(own_list) {}
|
||||
|
||||
virtual ~Iterator() {
|
||||
// if we own the list, we should also delete it
|
||||
if (own_list_) {
|
||||
delete list_;
|
||||
}
|
||||
};
|
||||
|
||||
// Returns true iff the iterator is positioned at a valid node.
|
||||
virtual bool Valid() const {
|
||||
return iter_.Valid();
|
||||
}
|
||||
|
||||
// Returns the key at the current position.
|
||||
// REQUIRES: Valid()
|
||||
virtual const char* key() const {
|
||||
return iter_.key();
|
||||
}
|
||||
|
||||
// Advances to the next position.
|
||||
// REQUIRES: Valid()
|
||||
virtual void Next() {
|
||||
iter_.Next();
|
||||
}
|
||||
|
||||
// Advances to the previous position.
|
||||
// REQUIRES: Valid()
|
||||
virtual void Prev() {
|
||||
iter_.Prev();
|
||||
}
|
||||
|
||||
// Advance to the first entry with a key >= target
|
||||
virtual void Seek(const char* target) {
|
||||
iter_.Seek(target);
|
||||
}
|
||||
|
||||
// Position at the first entry in collection.
|
||||
// Final state of iterator is Valid() iff collection is not empty.
|
||||
virtual void SeekToFirst() {
|
||||
iter_.SeekToFirst();
|
||||
}
|
||||
|
||||
// Position at the last entry in collection.
|
||||
// Final state of iterator is Valid() iff collection is not empty.
|
||||
virtual void SeekToLast() {
|
||||
iter_.SeekToLast();
|
||||
}
|
||||
private:
|
||||
Bucket* list_;
|
||||
Bucket::Iterator iter_;
|
||||
// here we track if we own list_. If we own it, we are also
|
||||
// responsible for it's cleaning. This is a poor man's shared_ptr
|
||||
bool own_list_;
|
||||
};
|
||||
|
||||
class EmptyIterator : public MemTableRep::Iterator {
|
||||
// This is used when there wasn't a bucket. It is cheaper than
|
||||
// instantiating an empty bucket over which to iterate.
|
||||
public:
|
||||
EmptyIterator() { }
|
||||
virtual bool Valid() const {
|
||||
return false;
|
||||
}
|
||||
virtual const char* key() const {
|
||||
assert(false);
|
||||
return nullptr;
|
||||
}
|
||||
virtual void Next() { }
|
||||
virtual void Prev() { }
|
||||
virtual void Seek(const char* target) { }
|
||||
virtual void SeekToFirst() { }
|
||||
virtual void SeekToLast() { }
|
||||
private:
|
||||
};
|
||||
|
||||
std::shared_ptr<EmptyIterator> empty_iterator_;
|
||||
};
|
||||
|
||||
class PrefixHashRepNoLock : public TransformRepNoLock {
|
||||
public:
|
||||
PrefixHashRepNoLock(MemTableRep::KeyComparator& compare, Arena* arena,
|
||||
const SliceTransform* transform, size_t bucket_size)
|
||||
: TransformRepNoLock(compare, arena, transform, bucket_size) { }
|
||||
|
||||
virtual std::shared_ptr<MemTableRep::Iterator> GetPrefixIterator(
|
||||
const Slice& prefix) override;
|
||||
};
|
||||
|
||||
TransformRepNoLock::TransformRepNoLock(MemTableRep::KeyComparator& compare,
|
||||
Arena* arena, const SliceTransform* transform, size_t bucket_size)
|
||||
: bucket_size_(bucket_size),
|
||||
transform_(transform),
|
||||
compare_(compare),
|
||||
arena_(arena),
|
||||
empty_iterator_(std::make_shared<EmptyIterator>()) {
|
||||
|
||||
buckets_ = new port::AtomicPointer[bucket_size];
|
||||
|
||||
for (size_t i = 0; i < bucket_size_; ++i) {
|
||||
buckets_[i].NoBarrier_Store(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
TransformRepNoLock::~TransformRepNoLock() {
|
||||
delete[] buckets_;
|
||||
}
|
||||
|
||||
TransformRepNoLock::Bucket* TransformRepNoLock::GetInitializedBucket(
|
||||
const Slice& transformed) {
|
||||
size_t hash = GetHash(transformed);
|
||||
auto bucket = GetBucket(hash);
|
||||
if (bucket == nullptr) {
|
||||
auto addr = arena_->AllocateAligned(sizeof(Bucket));
|
||||
bucket = new (addr) Bucket(compare_, arena_);
|
||||
buckets_[hash].Release_Store(static_cast<void*>(bucket));
|
||||
}
|
||||
return bucket;
|
||||
}
|
||||
|
||||
void TransformRepNoLock::Insert(const char* key) {
|
||||
assert(!Contains(key));
|
||||
auto transformed = transform_->Transform(UserKey(key));
|
||||
auto bucket = GetInitializedBucket(transformed);
|
||||
bucket->Insert(key);
|
||||
}
|
||||
|
||||
bool TransformRepNoLock::Contains(const char* key) const {
|
||||
auto transformed = transform_->Transform(UserKey(key));
|
||||
auto bucket = GetBucket(transformed);
|
||||
if (bucket == nullptr) {
|
||||
return false;
|
||||
}
|
||||
return bucket->Contains(key);
|
||||
}
|
||||
|
||||
size_t TransformRepNoLock::ApproximateMemoryUsage() {
|
||||
return sizeof(buckets_);
|
||||
}
|
||||
|
||||
std::shared_ptr<MemTableRep::Iterator> TransformRepNoLock::GetIterator() {
|
||||
auto list = new Bucket(compare_, arena_);
|
||||
for (size_t i = 0; i < bucket_size_; ++i) {
|
||||
auto bucket = GetBucket(i);
|
||||
if (bucket != nullptr) {
|
||||
Bucket::Iterator itr(bucket);
|
||||
for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
|
||||
list->Insert(itr.key());
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::make_shared<Iterator>(list);
|
||||
}
|
||||
|
||||
std::shared_ptr<MemTableRep::Iterator> TransformRepNoLock::GetTransformIterator(
|
||||
const Slice& transformed) {
|
||||
auto bucket = GetBucket(transformed);
|
||||
if (bucket == nullptr) {
|
||||
return empty_iterator_;
|
||||
}
|
||||
return std::make_shared<Iterator>(bucket, false);
|
||||
}
|
||||
|
||||
std::shared_ptr<MemTableRep::Iterator> TransformRepNoLock::GetIterator(
|
||||
const Slice& slice) {
|
||||
auto transformed = transform_->Transform(slice);
|
||||
return GetTransformIterator(transformed);
|
||||
}
|
||||
|
||||
} // anon namespace
|
||||
|
||||
std::shared_ptr<MemTableRep> TransformRepNoLockFactory::CreateMemTableRep(
|
||||
MemTableRep::KeyComparator& compare, Arena* arena) {
|
||||
return std::make_shared<TransformRepNoLock>(compare, arena, transform_,
|
||||
bucket_count_);
|
||||
}
|
||||
|
||||
std::shared_ptr<MemTableRep> PrefixHashRepNoLockFactory::CreateMemTableRep(
|
||||
MemTableRep::KeyComparator& compare, Arena* arena) {
|
||||
return std::make_shared<PrefixHashRepNoLock>(compare, arena, transform_,
|
||||
bucket_count_);
|
||||
}
|
||||
|
||||
std::shared_ptr<MemTableRep::Iterator> PrefixHashRepNoLock::GetPrefixIterator(
|
||||
const Slice& prefix) {
|
||||
return TransformRepNoLock::GetTransformIterator(prefix);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
Loading…
Reference in New Issue
Block a user