Maysam Yabandeh 11526252cc Pinnableslice (2nd attempt)
Summary:
PinnableSlice

    Summary:
    Currently the point lookup values are copied to a string provided by the
    user. This incures an extra memcpy cost. This patch allows doing point lookup
    via a PinnableSlice which pins the source memory location (instead of
    copying their content) and releases them after the content is consumed
    by the user. The old API of Get(string) is translated to the new API
    underneath.

    Here is the summary for improvements:

    value 100 byte: 1.8% regular, 1.2% merge values
    value 1k byte: 11.5% regular, 7.5% merge values
    value 10k byte: 26% regular, 29.9% merge values
    The improvement for merge could be more if we extend this approach to
    pin the merge output and delay the full merge operation until the user
    actually needs it. We have put that for future work.

    PS:
    Sometimes we observe a small decrease in performance when switching from
    t5452014 to this patch but with the old Get(string) API. The d
Closes https://github.com/facebook/rocksdb/pull/1756

Differential Revision: D4391738

Pulled By: maysamyabandeh

fbshipit-source-id: 6f3edd3
2017-03-13 11:54:10 -07:00

238 lines
6.8 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// Slice is a simple structure containing a pointer into some external
// storage and a size. The user of a Slice must ensure that the slice
// is not used after the corresponding external storage has been
// deallocated.
//
// Multiple threads can invoke const methods on a Slice without
// external synchronization, but if any of the threads may call a
// non-const method, all threads accessing the same Slice must use
// external synchronization.
#ifndef STORAGE_ROCKSDB_INCLUDE_SLICE_H_
#define STORAGE_ROCKSDB_INCLUDE_SLICE_H_
#include <assert.h>
#include <cstdio>
#include <stddef.h>
#include <string.h>
#include <string>
#include "rocksdb/cleanable.h"
namespace rocksdb {
class Slice {
public:
// Create an empty slice.
Slice() : data_(""), size_(0) { }
// Create a slice that refers to d[0,n-1].
Slice(const char* d, size_t n) : data_(d), size_(n) { }
// Create a slice that refers to the contents of "s"
/* implicit */
Slice(const std::string& s) : data_(s.data()), size_(s.size()) { }
// Create a slice that refers to s[0,strlen(s)-1]
/* implicit */
Slice(const char* s) : data_(s), size_(strlen(s)) { }
// Create a single slice from SliceParts using buf as storage.
// buf must exist as long as the returned Slice exists.
Slice(const struct SliceParts& parts, std::string* buf);
// Return a pointer to the beginning of the referenced data
const char* data() const { return data_; }
// Return the length (in bytes) of the referenced data
size_t size() const { return size_; }
// Return true iff the length of the referenced data is zero
bool empty() const { return size_ == 0; }
// Return the ith byte in the referenced data.
// REQUIRES: n < size()
char operator[](size_t n) const {
assert(n < size());
return data_[n];
}
// Change this slice to refer to an empty array
void clear() { data_ = ""; size_ = 0; }
// Drop the first "n" bytes from this slice.
void remove_prefix(size_t n) {
assert(n <= size());
data_ += n;
size_ -= n;
}
void remove_suffix(size_t n) {
assert(n <= size());
size_ -= n;
}
// Return a string that contains the copy of the referenced data.
// when hex is true, returns a string of twice the length hex encoded (0-9A-F)
std::string ToString(bool hex = false) const;
// Decodes the current slice interpreted as an hexadecimal string into result,
// if successful returns true, if this isn't a valid hex string
// (e.g not coming from Slice::ToString(true)) DecodeHex returns false.
// This slice is expected to have an even number of 0-9A-F characters
// also accepts lowercase (a-f)
bool DecodeHex(std::string* result) const;
// Three-way comparison. Returns value:
// < 0 iff "*this" < "b",
// == 0 iff "*this" == "b",
// > 0 iff "*this" > "b"
int compare(const Slice& b) const;
// Return true iff "x" is a prefix of "*this"
bool starts_with(const Slice& x) const {
return ((size_ >= x.size_) &&
(memcmp(data_, x.data_, x.size_) == 0));
}
bool ends_with(const Slice& x) const {
return ((size_ >= x.size_) &&
(memcmp(data_ + size_ - x.size_, x.data_, x.size_) == 0));
}
// Compare two slices and returns the first byte where they differ
size_t difference_offset(const Slice& b) const;
// private: make these public for rocksdbjni access
const char* data_;
size_t size_;
// Intentionally copyable
};
/**
* A Slice that can be pinned with some cleanup tasks, which will be run upon
* ::Reset() or object destruction, whichever is invoked first. This can be used
* to avoid memcpy by having the PinnsableSlice object referring to the data
* that is locked in the memory and release them after the data is consuned.
*/
class PinnableSlice : public Slice, public Cleanable {
public:
PinnableSlice() { buf_ = &self_space_; }
explicit PinnableSlice(std::string* buf) { buf_ = buf; }
inline void PinSlice(const Slice& s, CleanupFunction f, void* arg1,
void* arg2) {
assert(!pinned_);
pinned_ = true;
data_ = s.data();
size_ = s.size();
RegisterCleanup(f, arg1, arg2);
assert(pinned_);
}
inline void PinSlice(const Slice& s, Cleanable* cleanable) {
assert(!pinned_);
pinned_ = true;
data_ = s.data();
size_ = s.size();
cleanable->DelegateCleanupsTo(this);
assert(pinned_);
}
inline void PinSelf(const Slice& slice) {
assert(!pinned_);
buf_->assign(slice.data(), slice.size());
data_ = buf_->data();
size_ = buf_->size();
assert(!pinned_);
}
inline void PinSelf() {
assert(!pinned_);
data_ = buf_->data();
size_ = buf_->size();
assert(!pinned_);
}
void remove_suffix(size_t n) {
assert(n <= size());
if (pinned_) {
size_ -= n;
} else {
buf_->erase(size() - n, n);
PinSelf();
}
}
void remove_prefix(size_t n) {
assert(0); // Not implemented
}
void Reset() {
Cleanable::Reset();
pinned_ = false;
}
inline std::string* GetSelf() { return buf_; }
inline bool IsPinned() { return pinned_; }
private:
friend class PinnableSlice4Test;
std::string self_space_;
std::string* buf_;
bool pinned_ = false;
};
// A set of Slices that are virtually concatenated together. 'parts' points
// to an array of Slices. The number of elements in the array is 'num_parts'.
struct SliceParts {
SliceParts(const Slice* _parts, int _num_parts) :
parts(_parts), num_parts(_num_parts) { }
SliceParts() : parts(nullptr), num_parts(0) {}
const Slice* parts;
int num_parts;
};
inline bool operator==(const Slice& x, const Slice& y) {
return ((x.size() == y.size()) &&
(memcmp(x.data(), y.data(), x.size()) == 0));
}
inline bool operator!=(const Slice& x, const Slice& y) {
return !(x == y);
}
inline int Slice::compare(const Slice& b) const {
const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
int r = memcmp(data_, b.data_, min_len);
if (r == 0) {
if (size_ < b.size_) r = -1;
else if (size_ > b.size_) r = +1;
}
return r;
}
inline size_t Slice::difference_offset(const Slice& b) const {
size_t off = 0;
const size_t len = (size_ < b.size_) ? size_ : b.size_;
for (; off < len; off++) {
if (data_[off] != b.data_[off]) break;
}
return off;
}
} // namespace rocksdb
#endif // STORAGE_ROCKSDB_INCLUDE_SLICE_H_