rocksdb/db/db_impl_readonly.cc
Maysam Yabandeh 11526252cc Pinnableslice (2nd attempt)
Summary:
PinnableSlice

    Summary:
    Currently the point lookup values are copied to a string provided by the
    user. This incures an extra memcpy cost. This patch allows doing point lookup
    via a PinnableSlice which pins the source memory location (instead of
    copying their content) and releases them after the content is consumed
    by the user. The old API of Get(string) is translated to the new API
    underneath.

    Here is the summary for improvements:

    value 100 byte: 1.8% regular, 1.2% merge values
    value 1k byte: 11.5% regular, 7.5% merge values
    value 10k byte: 26% regular, 29.9% merge values
    The improvement for merge could be more if we extend this approach to
    pin the merge output and delay the full merge operation until the user
    actually needs it. We have put that for future work.

    PS:
    Sometimes we observe a small decrease in performance when switching from
    t5452014 to this patch but with the old Get(string) API. The d
Closes https://github.com/facebook/rocksdb/pull/1756

Differential Revision: D4391738

Pulled By: maysamyabandeh

fbshipit-source-id: 6f3edd3
2017-03-13 11:54:10 -07:00

199 lines
6.8 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
#include "db/db_impl_readonly.h"
#include "db/compacted_db_impl.h"
#include "db/db_impl.h"
#include "db/db_iter.h"
#include "db/merge_context.h"
#include "db/range_del_aggregator.h"
#include "util/perf_context_imp.h"
namespace rocksdb {
#ifndef ROCKSDB_LITE
DBImplReadOnly::DBImplReadOnly(const DBOptions& db_options,
const std::string& dbname)
: DBImpl(db_options, dbname) {
Log(INFO_LEVEL, immutable_db_options_.info_log,
"Opening the db in read only mode");
LogFlush(immutable_db_options_.info_log);
}
DBImplReadOnly::~DBImplReadOnly() {
}
// Implementations of the DB interface
Status DBImplReadOnly::Get(const ReadOptions& read_options,
ColumnFamilyHandle* column_family, const Slice& key,
PinnableSlice* pinnable_val) {
assert(pinnable_val != nullptr);
Status s;
SequenceNumber snapshot = versions_->LastSequence();
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
auto cfd = cfh->cfd();
SuperVersion* super_version = cfd->GetSuperVersion();
MergeContext merge_context;
RangeDelAggregator range_del_agg(cfd->internal_comparator(), snapshot);
LookupKey lkey(key, snapshot);
if (super_version->mem->Get(lkey, pinnable_val->GetSelf(), &s, &merge_context,
&range_del_agg, read_options)) {
pinnable_val->PinSelf();
} else {
PERF_TIMER_GUARD(get_from_output_files_time);
super_version->current->Get(read_options, lkey, pinnable_val, &s,
&merge_context, &range_del_agg);
}
return s;
}
Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options,
ColumnFamilyHandle* column_family) {
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
auto cfd = cfh->cfd();
SuperVersion* super_version = cfd->GetSuperVersion()->Ref();
SequenceNumber latest_snapshot = versions_->LastSequence();
auto db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(),
(read_options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(read_options.snapshot)
->number_
: latest_snapshot),
super_version->mutable_cf_options.max_sequential_skip_in_iterations,
super_version->version_number);
auto internal_iter =
NewInternalIterator(read_options, cfd, super_version, db_iter->GetArena(),
db_iter->GetRangeDelAggregator());
db_iter->SetIterUnderDBIter(internal_iter);
return db_iter;
}
Status DBImplReadOnly::NewIterators(
const ReadOptions& read_options,
const std::vector<ColumnFamilyHandle*>& column_families,
std::vector<Iterator*>* iterators) {
if (iterators == nullptr) {
return Status::InvalidArgument("iterators not allowed to be nullptr");
}
iterators->clear();
iterators->reserve(column_families.size());
SequenceNumber latest_snapshot = versions_->LastSequence();
for (auto cfh : column_families) {
auto* cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(cfh)->cfd();
auto* sv = cfd->GetSuperVersion()->Ref();
auto* db_iter = NewArenaWrappedDbIterator(
env_, *cfd->ioptions(), cfd->user_comparator(),
(read_options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(read_options.snapshot)
->number_
: latest_snapshot),
sv->mutable_cf_options.max_sequential_skip_in_iterations,
sv->version_number);
auto* internal_iter =
NewInternalIterator(read_options, cfd, sv, db_iter->GetArena(),
db_iter->GetRangeDelAggregator());
db_iter->SetIterUnderDBIter(internal_iter);
iterators->push_back(db_iter);
}
return Status::OK();
}
Status DB::OpenForReadOnly(const Options& options, const std::string& dbname,
DB** dbptr, bool error_if_log_file_exist) {
*dbptr = nullptr;
// Try to first open DB as fully compacted DB
Status s;
s = CompactedDBImpl::Open(options, dbname, dbptr);
if (s.ok()) {
return s;
}
DBOptions db_options(options);
ColumnFamilyOptions cf_options(options);
std::vector<ColumnFamilyDescriptor> column_families;
column_families.push_back(
ColumnFamilyDescriptor(kDefaultColumnFamilyName, cf_options));
std::vector<ColumnFamilyHandle*> handles;
s = DB::OpenForReadOnly(db_options, dbname, column_families, &handles, dbptr);
if (s.ok()) {
assert(handles.size() == 1);
// i can delete the handle since DBImpl is always holding a
// reference to default column family
delete handles[0];
}
return s;
}
Status DB::OpenForReadOnly(
const DBOptions& db_options, const std::string& dbname,
const std::vector<ColumnFamilyDescriptor>& column_families,
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
bool error_if_log_file_exist) {
*dbptr = nullptr;
handles->clear();
DBImplReadOnly* impl = new DBImplReadOnly(db_options, dbname);
impl->mutex_.Lock();
Status s = impl->Recover(column_families, true /* read only */,
error_if_log_file_exist);
if (s.ok()) {
// set column family handles
for (auto cf : column_families) {
auto cfd =
impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
if (cfd == nullptr) {
s = Status::InvalidArgument("Column family not found: ", cf.name);
break;
}
handles->push_back(new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
}
}
if (s.ok()) {
for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
delete cfd->InstallSuperVersion(new SuperVersion(), &impl->mutex_);
}
}
impl->mutex_.Unlock();
if (s.ok()) {
*dbptr = impl;
for (auto* h : *handles) {
impl->NewThreadStatusCfInfo(
reinterpret_cast<ColumnFamilyHandleImpl*>(h)->cfd());
}
} else {
for (auto h : *handles) {
delete h;
}
handles->clear();
delete impl;
}
return s;
}
#else // !ROCKSDB_LITE
Status DB::OpenForReadOnly(const Options& options, const std::string& dbname,
DB** dbptr, bool error_if_log_file_exist) {
return Status::NotSupported("Not supported in ROCKSDB_LITE.");
}
Status DB::OpenForReadOnly(
const DBOptions& db_options, const std::string& dbname,
const std::vector<ColumnFamilyDescriptor>& column_families,
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
bool error_if_log_file_exist) {
return Status::NotSupported("Not supported in ROCKSDB_LITE.");
}
#endif // !ROCKSDB_LITE
} // namespace rocksdb