rocksdb/db/db_iter.h
Islam AbdelRahman aececc209e Introduce ReadOptions::pin_data (support zero copy for keys)
Summary:
This patch update the Iterator API to introduce new functions that allow users to keep the Slices returned by key() valid as long as the Iterator is not deleted

ReadOptions::pin_data : If true keep loaded blocks in memory as long as the iterator is not deleted
Iterator::IsKeyPinned() : If true, this mean that the Slice returned by key() is valid as long as the iterator is not deleted

Also add a new option BlockBasedTableOptions::use_delta_encoding to allow users to disable delta_encoding if needed.

Benchmark results (using https://phabricator.fb.com/P20083553)

```
// $ du -h /home/tec/local/normal.4K.Snappy/db10077
// 6.1G    /home/tec/local/normal.4K.Snappy/db10077

// $ du -h /home/tec/local/zero.8K.LZ4/db10077
// 6.4G    /home/tec/local/zero.8K.LZ4/db10077

// Benchmarks for shard db10077
// _build/opt/rocks/benchmark/rocks_copy_benchmark \
//      --normal_db_path="/home/tec/local/normal.4K.Snappy/db10077" \
//      --zero_db_path="/home/tec/local/zero.8K.LZ4/db10077"

// First run
// ============================================================================
// rocks/benchmark/RocksCopyBenchmark.cpp          relative  time/iter  iters/s
// ============================================================================
// BM_StringCopy                                                 1.73s  576.97m
// BM_StringPiece                                   103.74%      1.67s  598.55m
// ============================================================================
// Match rate : 1000000 / 1000000

// Second run
// ============================================================================
// rocks/benchmark/RocksCopyBenchmark.cpp          relative  time/iter  iters/s
// ============================================================================
// BM_StringCopy                                              611.99ms     1.63
// BM_StringPiece                                   203.76%   300.35ms     3.33
// ============================================================================
// Match rate : 1000000 / 1000000
```

Test Plan: Unit tests

Reviewers: sdong, igor, anthony, yhchiang, rven

Reviewed By: rven

Subscribers: dhruba, lovro, adsharma

Differential Revision: https://reviews.facebook.net/D48999
2015-12-16 12:08:30 -08:00

85 lines
3.2 KiB
C++

// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <stdint.h>
#include "rocksdb/db.h"
#include "rocksdb/iterator.h"
#include "db/dbformat.h"
#include "util/arena.h"
#include "util/autovector.h"
namespace rocksdb {
class Arena;
class DBIter;
class InternalIterator;
// Return a new iterator that converts internal keys (yielded by
// "*internal_iter") that were live at the specified "sequence" number
// into appropriate user keys.
extern Iterator* NewDBIterator(Env* env, const ImmutableCFOptions& options,
const Comparator* user_key_comparator,
InternalIterator* internal_iter,
const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations,
const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false,
bool pin_data = false);
// A wrapper iterator which wraps DB Iterator and the arena, with which the DB
// iterator is supposed be allocated. This class is used as an entry point of
// a iterator hierarchy whose memory can be allocated inline. In that way,
// accessing the iterator tree can be more cache friendly. It is also faster
// to allocate.
class ArenaWrappedDBIter : public Iterator {
public:
virtual ~ArenaWrappedDBIter();
// Get the arena to be used to allocate memory for DBIter to be wrapped,
// as well as child iterators in it.
virtual Arena* GetArena() { return &arena_; }
// Set the DB Iterator to be wrapped
virtual void SetDBIter(DBIter* iter);
// Set the internal iterator wrapped inside the DB Iterator. Usually it is
// a merging iterator.
virtual void SetIterUnderDBIter(InternalIterator* iter);
virtual bool Valid() const override;
virtual void SeekToFirst() override;
virtual void SeekToLast() override;
virtual void Seek(const Slice& target) override;
virtual void Next() override;
virtual void Prev() override;
virtual Slice key() const override;
virtual Slice value() const override;
virtual Status status() const override;
void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
virtual Status PinData();
virtual Status ReleasePinnedData();
virtual bool IsKeyPinned() const override;
private:
DBIter* db_iter_;
Arena arena_;
};
// Generate the arena wrapped iterator class.
extern ArenaWrappedDBIter* NewArenaWrappedDbIterator(
Env* env, const ImmutableCFOptions& options,
const Comparator* user_key_comparator, const SequenceNumber& sequence,
uint64_t max_sequential_skip_in_iterations,
const Slice* iterate_upper_bound = nullptr,
bool prefix_same_as_start = false, bool pin_data = false);
} // namespace rocksdb