rocksdb/table/iterator.cc
Maysam Yabandeh 11526252cc Pinnableslice (2nd attempt)
Summary:
PinnableSlice

    Summary:
    Currently the point lookup values are copied to a string provided by the
    user. This incures an extra memcpy cost. This patch allows doing point lookup
    via a PinnableSlice which pins the source memory location (instead of
    copying their content) and releases them after the content is consumed
    by the user. The old API of Get(string) is translated to the new API
    underneath.

    Here is the summary for improvements:

    value 100 byte: 1.8% regular, 1.2% merge values
    value 1k byte: 11.5% regular, 7.5% merge values
    value 10k byte: 26% regular, 29.9% merge values
    The improvement for merge could be more if we extend this approach to
    pin the merge output and delay the full merge operation until the user
    actually needs it. We have put that for future work.

    PS:
    Sometimes we observe a small decrease in performance when switching from
    t5452014 to this patch but with the old Get(string) API. The d
Closes https://github.com/facebook/rocksdb/pull/1756

Differential Revision: D4391738

Pulled By: maysamyabandeh

fbshipit-source-id: 6f3edd3
2017-03-13 11:54:10 -07:00

181 lines
5.5 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "rocksdb/iterator.h"
#include "table/internal_iterator.h"
#include "table/iterator_wrapper.h"
#include "util/arena.h"
namespace rocksdb {
Cleanable::Cleanable() {
cleanup_.function = nullptr;
cleanup_.next = nullptr;
}
Cleanable::~Cleanable() { DoCleanup(); }
// If the entire linked list was on heap we could have simply add attach one
// link list to another. However the head is an embeded object to avoid the cost
// of creating objects for most of the use cases when the Cleanable has only one
// Cleanup to do. We could put evernything on heap if benchmarks show no
// negative impact on performance.
// Also we need to iterate on the linked list since there is no pointer to the
// tail. We can add the tail pointer but maintainin it might negatively impact
// the perforamnce for the common case of one cleanup where tail pointer is not
// needed. Again benchmarks could clarify that.
// Even without a tail pointer we could iterate on the list, find the tail, and
// have only that node updated without the need to insert the Cleanups one by
// one. This however would be redundant when the source Cleanable has one or a
// few Cleanups which is the case most of the time.
// TODO(myabandeh): if the list is too long we should maintain a tail pointer
// and have the entire list (minus the head that has to be inserted separately)
// merged with the target linked list at once.
void Cleanable::DelegateCleanupsTo(Cleanable* other) {
assert(other != nullptr);
if (cleanup_.function == nullptr) {
return;
}
Cleanup* c = &cleanup_;
other->RegisterCleanup(c->function, c->arg1, c->arg2);
c = c->next;
while (c != nullptr) {
Cleanup* next = c->next;
other->RegisterCleanup(c);
c = next;
}
cleanup_.function = nullptr;
cleanup_.next = nullptr;
}
void Cleanable::RegisterCleanup(Cleanable::Cleanup* c) {
assert(c != nullptr);
if (cleanup_.function == nullptr) {
cleanup_.function = c->function;
cleanup_.arg1 = c->arg1;
cleanup_.arg2 = c->arg2;
delete c;
} else {
c->next = cleanup_.next;
cleanup_.next = c;
}
}
void Cleanable::RegisterCleanup(CleanupFunction func, void* arg1, void* arg2) {
assert(func != nullptr);
Cleanup* c;
if (cleanup_.function == nullptr) {
c = &cleanup_;
} else {
c = new Cleanup;
c->next = cleanup_.next;
cleanup_.next = c;
}
c->function = func;
c->arg1 = arg1;
c->arg2 = arg2;
}
Status Iterator::GetProperty(std::string prop_name, std::string* prop) {
if (prop == nullptr) {
return Status::InvalidArgument("prop is nullptr");
}
if (prop_name == "rocksdb.iterator.is-key-pinned") {
*prop = "0";
return Status::OK();
}
return Status::InvalidArgument("Undentified property.");
}
namespace {
class EmptyIterator : public Iterator {
public:
explicit EmptyIterator(const Status& s) : status_(s) { }
virtual bool Valid() const override { return false; }
virtual void Seek(const Slice& target) override {}
virtual void SeekForPrev(const Slice& target) override {}
virtual void SeekToFirst() override {}
virtual void SeekToLast() override {}
virtual void Next() override { assert(false); }
virtual void Prev() override { assert(false); }
Slice key() const override {
assert(false);
return Slice();
}
Slice value() const override {
assert(false);
return Slice();
}
virtual Status status() const override { return status_; }
private:
Status status_;
};
class EmptyInternalIterator : public InternalIterator {
public:
explicit EmptyInternalIterator(const Status& s) : status_(s) {}
virtual bool Valid() const override { return false; }
virtual void Seek(const Slice& target) override {}
virtual void SeekForPrev(const Slice& target) override {}
virtual void SeekToFirst() override {}
virtual void SeekToLast() override {}
virtual void Next() override { assert(false); }
virtual void Prev() override { assert(false); }
Slice key() const override {
assert(false);
return Slice();
}
Slice value() const override {
assert(false);
return Slice();
}
virtual Status status() const override { return status_; }
private:
Status status_;
};
} // namespace
Iterator* NewEmptyIterator() {
return new EmptyIterator(Status::OK());
}
Iterator* NewErrorIterator(const Status& status) {
return new EmptyIterator(status);
}
InternalIterator* NewEmptyInternalIterator() {
return new EmptyInternalIterator(Status::OK());
}
InternalIterator* NewEmptyInternalIterator(Arena* arena) {
if (arena == nullptr) {
return NewEmptyInternalIterator();
} else {
auto mem = arena->AllocateAligned(sizeof(EmptyIterator));
return new (mem) EmptyInternalIterator(Status::OK());
}
}
InternalIterator* NewErrorInternalIterator(const Status& status) {
return new EmptyInternalIterator(status);
}
InternalIterator* NewErrorInternalIterator(const Status& status, Arena* arena) {
if (arena == nullptr) {
return NewErrorInternalIterator(status);
} else {
auto mem = arena->AllocateAligned(sizeof(EmptyIterator));
return new (mem) EmptyInternalIterator(status);
}
}
} // namespace rocksdb