2013-10-16 23:59:46 +02:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
2014-04-15 22:39:26 +02:00
|
|
|
#ifndef ROCKSDB_LITE
|
2013-08-23 17:38:13 +02:00
|
|
|
#include "rocksdb/memtablerep.h"
|
2013-08-23 08:10:02 +02:00
|
|
|
|
|
|
|
#include <unordered_set>
|
|
|
|
#include <set>
|
|
|
|
#include <memory>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <type_traits>
|
|
|
|
|
2014-01-31 02:18:17 +01:00
|
|
|
#include "util/arena.h"
|
2013-11-21 04:49:27 +01:00
|
|
|
#include "db/memtable.h"
|
2013-08-23 08:10:02 +02:00
|
|
|
#include "port/port.h"
|
|
|
|
#include "util/mutexlock.h"
|
|
|
|
#include "util/stl_wrappers.h"
|
|
|
|
|
2013-10-04 06:49:15 +02:00
|
|
|
namespace rocksdb {
|
2013-08-23 08:10:02 +02:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
using namespace stl_wrappers;
|
|
|
|
|
|
|
|
class VectorRep : public MemTableRep {
|
|
|
|
public:
|
2014-12-02 21:09:20 +01:00
|
|
|
VectorRep(const KeyComparator& compare, MemTableAllocator* allocator,
|
|
|
|
size_t count);
|
2013-08-23 08:10:02 +02:00
|
|
|
|
|
|
|
// Insert key into the collection. (The caller will pack key and value into a
|
|
|
|
// single buffer and pass that in as the parameter to Insert)
|
|
|
|
// REQUIRES: nothing that compares equal to key is currently in the
|
|
|
|
// collection.
|
2014-04-05 00:37:28 +02:00
|
|
|
virtual void Insert(KeyHandle handle) override;
|
2013-08-23 08:10:02 +02:00
|
|
|
|
|
|
|
// Returns true iff an entry that compares equal to key is in the collection.
|
|
|
|
virtual bool Contains(const char* key) const override;
|
|
|
|
|
|
|
|
virtual void MarkReadOnly() override;
|
|
|
|
|
|
|
|
virtual size_t ApproximateMemoryUsage() override;
|
|
|
|
|
2014-02-11 18:46:30 +01:00
|
|
|
virtual void Get(const LookupKey& k, void* callback_args,
|
|
|
|
bool (*callback_func)(void* arg,
|
|
|
|
const char* entry)) override;
|
|
|
|
|
2013-08-23 08:10:02 +02:00
|
|
|
virtual ~VectorRep() override { }
|
|
|
|
|
|
|
|
class Iterator : public MemTableRep::Iterator {
|
2013-09-17 23:11:04 +02:00
|
|
|
class VectorRep* vrep_;
|
2013-08-23 08:10:02 +02:00
|
|
|
std::shared_ptr<std::vector<const char*>> bucket_;
|
2013-09-17 23:11:04 +02:00
|
|
|
typename std::vector<const char*>::const_iterator mutable cit_;
|
2013-08-23 08:10:02 +02:00
|
|
|
const KeyComparator& compare_;
|
2013-11-21 04:49:27 +01:00
|
|
|
std::string tmp_; // For passing to EncodeKey
|
2013-09-17 23:11:04 +02:00
|
|
|
bool mutable sorted_;
|
|
|
|
void DoSort() const;
|
2013-08-23 08:10:02 +02:00
|
|
|
public:
|
2013-09-17 23:11:04 +02:00
|
|
|
explicit Iterator(class VectorRep* vrep,
|
|
|
|
std::shared_ptr<std::vector<const char*>> bucket,
|
2013-08-23 08:10:02 +02:00
|
|
|
const KeyComparator& compare);
|
|
|
|
|
|
|
|
// Initialize an iterator over the specified collection.
|
|
|
|
// The returned iterator is not valid.
|
|
|
|
// explicit Iterator(const MemTableRep* collection);
|
|
|
|
virtual ~Iterator() override { };
|
|
|
|
|
|
|
|
// Returns true iff the iterator is positioned at a valid node.
|
|
|
|
virtual bool Valid() const override;
|
|
|
|
|
|
|
|
// Returns the key at the current position.
|
|
|
|
// REQUIRES: Valid()
|
|
|
|
virtual const char* key() const override;
|
|
|
|
|
|
|
|
// Advances to the next position.
|
|
|
|
// REQUIRES: Valid()
|
|
|
|
virtual void Next() override;
|
|
|
|
|
|
|
|
// Advances to the previous position.
|
|
|
|
// REQUIRES: Valid()
|
|
|
|
virtual void Prev() override;
|
|
|
|
|
|
|
|
// Advance to the first entry with a key >= target
|
2013-11-21 04:49:27 +01:00
|
|
|
virtual void Seek(const Slice& user_key, const char* memtable_key) override;
|
2013-08-23 08:10:02 +02:00
|
|
|
|
|
|
|
// Position at the first entry in collection.
|
|
|
|
// Final state of iterator is Valid() iff collection is not empty.
|
|
|
|
virtual void SeekToFirst() override;
|
|
|
|
|
|
|
|
// Position at the last entry in collection.
|
|
|
|
// Final state of iterator is Valid() iff collection is not empty.
|
|
|
|
virtual void SeekToLast() override;
|
|
|
|
};
|
|
|
|
|
|
|
|
// Return an iterator over the keys in this representation.
|
In DB::NewIterator(), try to allocate the whole iterator tree in an arena
Summary:
In this patch, try to allocate the whole iterator tree starting from DBIter from an arena
1. ArenaWrappedDBIter is created when serves as the entry point of an iterator tree, with an arena in it.
2. Add an option to create iterator from arena for following iterators: DBIter, MergingIterator, MemtableIterator, all mem table's iterators, all table reader's iterators and two level iterator.
3. MergeIteratorBuilder is created to incrementally build the tree of internal iterators. It is passed to mem table list and version set and add iterators to it.
Limitations:
(1) Only DB::NewIterator() without tailing uses the arena. Other cases, including readonly DB and compactions are still from malloc
(2) Two level iterator itself is allocated in arena, but not iterators inside it.
Test Plan: make all check
Reviewers: ljin, haobo
Reviewed By: haobo
Subscribers: leveldb, dhruba, yhchiang, igor
Differential Revision: https://reviews.facebook.net/D18513
2014-06-03 01:38:00 +02:00
|
|
|
virtual MemTableRep::Iterator* GetIterator(Arena* arena) override;
|
2013-08-23 08:10:02 +02:00
|
|
|
|
|
|
|
private:
|
2013-09-17 23:11:04 +02:00
|
|
|
friend class Iterator;
|
2013-08-23 08:10:02 +02:00
|
|
|
typedef std::vector<const char*> Bucket;
|
|
|
|
std::shared_ptr<Bucket> bucket_;
|
|
|
|
mutable port::RWMutex rwlock_;
|
2013-09-17 23:11:04 +02:00
|
|
|
bool immutable_;
|
|
|
|
bool sorted_;
|
2013-08-23 08:10:02 +02:00
|
|
|
const KeyComparator& compare_;
|
|
|
|
};
|
|
|
|
|
2014-04-05 00:37:28 +02:00
|
|
|
void VectorRep::Insert(KeyHandle handle) {
|
|
|
|
auto* key = static_cast<char*>(handle);
|
2013-08-23 08:10:02 +02:00
|
|
|
WriteLock l(&rwlock_);
|
|
|
|
assert(!immutable_);
|
|
|
|
bucket_->push_back(key);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns true iff an entry that compares equal to key is in the collection.
|
|
|
|
bool VectorRep::Contains(const char* key) const {
|
|
|
|
ReadLock l(&rwlock_);
|
|
|
|
return std::find(bucket_->begin(), bucket_->end(), key) != bucket_->end();
|
|
|
|
}
|
|
|
|
|
|
|
|
void VectorRep::MarkReadOnly() {
|
|
|
|
WriteLock l(&rwlock_);
|
|
|
|
immutable_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t VectorRep::ApproximateMemoryUsage() {
|
|
|
|
return
|
|
|
|
sizeof(bucket_) + sizeof(*bucket_) +
|
|
|
|
bucket_->size() *
|
|
|
|
sizeof(
|
|
|
|
std::remove_reference<decltype(*bucket_)>::type::value_type
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2014-12-02 21:09:20 +01:00
|
|
|
VectorRep::VectorRep(const KeyComparator& compare, MemTableAllocator* allocator,
|
|
|
|
size_t count)
|
|
|
|
: MemTableRep(allocator),
|
2014-04-05 00:37:28 +02:00
|
|
|
bucket_(new Bucket()),
|
2013-09-17 23:11:04 +02:00
|
|
|
immutable_(false),
|
|
|
|
sorted_(false),
|
2013-09-25 07:23:19 +02:00
|
|
|
compare_(compare) { bucket_.get()->reserve(count); }
|
2013-08-23 08:10:02 +02:00
|
|
|
|
2013-09-17 23:11:04 +02:00
|
|
|
VectorRep::Iterator::Iterator(class VectorRep* vrep,
|
|
|
|
std::shared_ptr<std::vector<const char*>> bucket,
|
2013-08-23 08:10:02 +02:00
|
|
|
const KeyComparator& compare)
|
2013-09-17 23:11:04 +02:00
|
|
|
: vrep_(vrep),
|
|
|
|
bucket_(bucket),
|
2013-11-13 05:05:28 +01:00
|
|
|
cit_(bucket_->end()),
|
2013-09-17 23:11:04 +02:00
|
|
|
compare_(compare),
|
|
|
|
sorted_(false) { }
|
|
|
|
|
|
|
|
void VectorRep::Iterator::DoSort() const {
|
|
|
|
// vrep is non-null means that we are working on an immutable memtable
|
|
|
|
if (!sorted_ && vrep_ != nullptr) {
|
|
|
|
WriteLock l(&vrep_->rwlock_);
|
|
|
|
if (!vrep_->sorted_) {
|
|
|
|
std::sort(bucket_->begin(), bucket_->end(), Compare(compare_));
|
|
|
|
cit_ = bucket_->begin();
|
|
|
|
vrep_->sorted_ = true;
|
|
|
|
}
|
|
|
|
sorted_ = true;
|
|
|
|
}
|
|
|
|
if (!sorted_) {
|
|
|
|
std::sort(bucket_->begin(), bucket_->end(), Compare(compare_));
|
|
|
|
cit_ = bucket_->begin();
|
|
|
|
sorted_ = true;
|
|
|
|
}
|
|
|
|
assert(sorted_);
|
|
|
|
assert(vrep_ == nullptr || vrep_->sorted_);
|
|
|
|
}
|
2013-08-23 08:10:02 +02:00
|
|
|
|
|
|
|
// Returns true iff the iterator is positioned at a valid node.
|
|
|
|
bool VectorRep::Iterator::Valid() const {
|
2013-09-17 23:11:04 +02:00
|
|
|
DoSort();
|
2013-08-23 08:10:02 +02:00
|
|
|
return cit_ != bucket_->end();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the key at the current position.
|
|
|
|
// REQUIRES: Valid()
|
|
|
|
const char* VectorRep::Iterator::key() const {
|
|
|
|
assert(Valid());
|
|
|
|
return *cit_;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advances to the next position.
|
|
|
|
// REQUIRES: Valid()
|
|
|
|
void VectorRep::Iterator::Next() {
|
|
|
|
assert(Valid());
|
|
|
|
if (cit_ == bucket_->end()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
++cit_;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advances to the previous position.
|
|
|
|
// REQUIRES: Valid()
|
|
|
|
void VectorRep::Iterator::Prev() {
|
|
|
|
assert(Valid());
|
|
|
|
if (cit_ == bucket_->begin()) {
|
|
|
|
// If you try to go back from the first element, the iterator should be
|
|
|
|
// invalidated. So we set it to past-the-end. This means that you can
|
|
|
|
// treat the container circularly.
|
|
|
|
cit_ = bucket_->end();
|
|
|
|
} else {
|
|
|
|
--cit_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Advance to the first entry with a key >= target
|
2013-11-21 04:49:27 +01:00
|
|
|
void VectorRep::Iterator::Seek(const Slice& user_key,
|
|
|
|
const char* memtable_key) {
|
2013-09-17 23:11:04 +02:00
|
|
|
DoSort();
|
2013-08-23 08:10:02 +02:00
|
|
|
// Do binary search to find first value not less than the target
|
2013-11-21 04:49:27 +01:00
|
|
|
const char* encoded_key =
|
|
|
|
(memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, user_key);
|
2013-08-23 08:10:02 +02:00
|
|
|
cit_ = std::equal_range(bucket_->begin(),
|
|
|
|
bucket_->end(),
|
2013-11-21 04:49:27 +01:00
|
|
|
encoded_key,
|
2013-08-23 08:10:02 +02:00
|
|
|
[this] (const char* a, const char* b) {
|
|
|
|
return compare_(a, b) < 0;
|
|
|
|
}).first;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Position at the first entry in collection.
|
|
|
|
// Final state of iterator is Valid() iff collection is not empty.
|
|
|
|
void VectorRep::Iterator::SeekToFirst() {
|
2013-09-17 23:11:04 +02:00
|
|
|
DoSort();
|
2013-08-23 08:10:02 +02:00
|
|
|
cit_ = bucket_->begin();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Position at the last entry in collection.
|
|
|
|
// Final state of iterator is Valid() iff collection is not empty.
|
|
|
|
void VectorRep::Iterator::SeekToLast() {
|
2013-09-17 23:11:04 +02:00
|
|
|
DoSort();
|
2013-08-23 08:10:02 +02:00
|
|
|
cit_ = bucket_->end();
|
|
|
|
if (bucket_->size() != 0) {
|
|
|
|
--cit_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-11 18:46:30 +01:00
|
|
|
void VectorRep::Get(const LookupKey& k, void* callback_args,
|
|
|
|
bool (*callback_func)(void* arg, const char* entry)) {
|
|
|
|
rwlock_.ReadLock();
|
|
|
|
VectorRep* vector_rep;
|
|
|
|
std::shared_ptr<Bucket> bucket;
|
|
|
|
if (immutable_) {
|
|
|
|
vector_rep = this;
|
|
|
|
} else {
|
|
|
|
vector_rep = nullptr;
|
|
|
|
bucket.reset(new Bucket(*bucket_)); // make a copy
|
|
|
|
}
|
|
|
|
VectorRep::Iterator iter(vector_rep, immutable_ ? bucket_ : bucket, compare_);
|
2014-06-17 00:41:46 +02:00
|
|
|
rwlock_.ReadUnlock();
|
2014-02-11 18:46:30 +01:00
|
|
|
|
|
|
|
for (iter.Seek(k.user_key(), k.memtable_key().data());
|
|
|
|
iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
In DB::NewIterator(), try to allocate the whole iterator tree in an arena
Summary:
In this patch, try to allocate the whole iterator tree starting from DBIter from an arena
1. ArenaWrappedDBIter is created when serves as the entry point of an iterator tree, with an arena in it.
2. Add an option to create iterator from arena for following iterators: DBIter, MergingIterator, MemtableIterator, all mem table's iterators, all table reader's iterators and two level iterator.
3. MergeIteratorBuilder is created to incrementally build the tree of internal iterators. It is passed to mem table list and version set and add iterators to it.
Limitations:
(1) Only DB::NewIterator() without tailing uses the arena. Other cases, including readonly DB and compactions are still from malloc
(2) Two level iterator itself is allocated in arena, but not iterators inside it.
Test Plan: make all check
Reviewers: ljin, haobo
Reviewed By: haobo
Subscribers: leveldb, dhruba, yhchiang, igor
Differential Revision: https://reviews.facebook.net/D18513
2014-06-03 01:38:00 +02:00
|
|
|
MemTableRep::Iterator* VectorRep::GetIterator(Arena* arena) {
|
|
|
|
char* mem = nullptr;
|
|
|
|
if (arena != nullptr) {
|
|
|
|
mem = arena->AllocateAligned(sizeof(Iterator));
|
|
|
|
}
|
2013-08-23 08:10:02 +02:00
|
|
|
ReadLock l(&rwlock_);
|
2013-09-17 23:11:04 +02:00
|
|
|
// Do not sort here. The sorting would be done the first time
|
|
|
|
// a Seek is performed on the iterator.
|
2013-08-23 08:10:02 +02:00
|
|
|
if (immutable_) {
|
In DB::NewIterator(), try to allocate the whole iterator tree in an arena
Summary:
In this patch, try to allocate the whole iterator tree starting from DBIter from an arena
1. ArenaWrappedDBIter is created when serves as the entry point of an iterator tree, with an arena in it.
2. Add an option to create iterator from arena for following iterators: DBIter, MergingIterator, MemtableIterator, all mem table's iterators, all table reader's iterators and two level iterator.
3. MergeIteratorBuilder is created to incrementally build the tree of internal iterators. It is passed to mem table list and version set and add iterators to it.
Limitations:
(1) Only DB::NewIterator() without tailing uses the arena. Other cases, including readonly DB and compactions are still from malloc
(2) Two level iterator itself is allocated in arena, but not iterators inside it.
Test Plan: make all check
Reviewers: ljin, haobo
Reviewed By: haobo
Subscribers: leveldb, dhruba, yhchiang, igor
Differential Revision: https://reviews.facebook.net/D18513
2014-06-03 01:38:00 +02:00
|
|
|
if (arena == nullptr) {
|
|
|
|
return new Iterator(this, bucket_, compare_);
|
|
|
|
} else {
|
|
|
|
return new (mem) Iterator(this, bucket_, compare_);
|
|
|
|
}
|
2013-08-23 08:10:02 +02:00
|
|
|
} else {
|
2013-09-17 23:11:04 +02:00
|
|
|
std::shared_ptr<Bucket> tmp;
|
2013-08-23 08:10:02 +02:00
|
|
|
tmp.reset(new Bucket(*bucket_)); // make a copy
|
In DB::NewIterator(), try to allocate the whole iterator tree in an arena
Summary:
In this patch, try to allocate the whole iterator tree starting from DBIter from an arena
1. ArenaWrappedDBIter is created when serves as the entry point of an iterator tree, with an arena in it.
2. Add an option to create iterator from arena for following iterators: DBIter, MergingIterator, MemtableIterator, all mem table's iterators, all table reader's iterators and two level iterator.
3. MergeIteratorBuilder is created to incrementally build the tree of internal iterators. It is passed to mem table list and version set and add iterators to it.
Limitations:
(1) Only DB::NewIterator() without tailing uses the arena. Other cases, including readonly DB and compactions are still from malloc
(2) Two level iterator itself is allocated in arena, but not iterators inside it.
Test Plan: make all check
Reviewers: ljin, haobo
Reviewed By: haobo
Subscribers: leveldb, dhruba, yhchiang, igor
Differential Revision: https://reviews.facebook.net/D18513
2014-06-03 01:38:00 +02:00
|
|
|
if (arena == nullptr) {
|
|
|
|
return new Iterator(nullptr, tmp, compare_);
|
|
|
|
} else {
|
|
|
|
return new (mem) Iterator(nullptr, tmp, compare_);
|
|
|
|
}
|
2013-08-23 08:10:02 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} // anon namespace
|
|
|
|
|
2014-01-16 03:17:58 +01:00
|
|
|
MemTableRep* VectorRepFactory::CreateMemTableRep(
|
2014-12-02 21:09:20 +01:00
|
|
|
const MemTableRep::KeyComparator& compare, MemTableAllocator* allocator,
|
2014-05-05 00:52:23 +02:00
|
|
|
const SliceTransform*, Logger* logger) {
|
2014-12-02 21:09:20 +01:00
|
|
|
return new VectorRep(compare, allocator, count_);
|
2013-08-23 08:10:02 +02:00
|
|
|
}
|
2013-10-04 06:49:15 +02:00
|
|
|
} // namespace rocksdb
|
2014-04-15 22:39:26 +02:00
|
|
|
#endif // ROCKSDB_LITE
|