rocksdb/table/internal_iterator.h
Maysam Yabandeh caf0f53a74 Index value delta encoding (#3983)
Summary:
Given that index value is a BlockHandle, which is basically an <offset, size> pair we can apply delta encoding on the values. The first value at each index restart interval encoded the full BlockHandle but the rest encode only the size. Refer to IndexBlockIter::DecodeCurrentValue for the detail of the encoding. This reduces the index size which helps using the  block cache more efficiently. The feature is enabled with using format_version 4.

The feature comes with a bit of cpu overhead which should be paid back by the higher cache hits due to smaller index block size.
Results with sysbench read-only using 4k blocks and using 16 index restart interval:
Format 2:
19585   rocksdb read-only range=100
Format 3:
19569   rocksdb read-only range=100
Format 4:
19352   rocksdb read-only range=100
Pull Request resolved: https://github.com/facebook/rocksdb/pull/3983

Differential Revision: D8361343

Pulled By: maysamyabandeh

fbshipit-source-id: f882ee082322acac32b0072e2bdbb0b5f854e651
2018-08-09 16:58:40 -07:00

143 lines
5.2 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#pragma once
#include <string>
#include "rocksdb/comparator.h"
#include "rocksdb/iterator.h"
#include "rocksdb/status.h"
#include "table/format.h"
namespace rocksdb {
class PinnedIteratorsManager;
template <class TValue>
class InternalIteratorBase : public Cleanable {
public:
InternalIteratorBase() {}
virtual ~InternalIteratorBase() {}
// An iterator is either positioned at a key/value pair, or
// not valid. This method returns true iff the iterator is valid.
// Always returns false if !status().ok().
virtual bool Valid() const = 0;
// Position at the first key in the source. The iterator is Valid()
// after this call iff the source is not empty.
virtual void SeekToFirst() = 0;
// Position at the last key in the source. The iterator is
// Valid() after this call iff the source is not empty.
virtual void SeekToLast() = 0;
// Position at the first key in the source that at or past target
// The iterator is Valid() after this call iff the source contains
// an entry that comes at or past target.
// All Seek*() methods clear any error status() that the iterator had prior to
// the call; after the seek, status() indicates only the error (if any) that
// happened during the seek, not any past errors.
virtual void Seek(const Slice& target) = 0;
// Position at the first key in the source that at or before target
// The iterator is Valid() after this call iff the source contains
// an entry that comes at or before target.
virtual void SeekForPrev(const Slice& target) = 0;
// Moves to the next entry in the source. After this call, Valid() is
// true iff the iterator was not positioned at the last entry in the source.
// REQUIRES: Valid()
virtual void Next() = 0;
// Moves to the previous entry in the source. After this call, Valid() is
// true iff the iterator was not positioned at the first entry in source.
// REQUIRES: Valid()
virtual void Prev() = 0;
// Return the key for the current entry. The underlying storage for
// the returned slice is valid only until the next modification of
// the iterator.
// REQUIRES: Valid()
virtual Slice key() const = 0;
// Return the value for the current entry. The underlying storage for
// the returned slice is valid only until the next modification of
// the iterator.
// REQUIRES: Valid()
virtual TValue value() const = 0;
// If an error has occurred, return it. Else return an ok status.
// If non-blocking IO is requested and this operation cannot be
// satisfied without doing some IO, then this returns Status::Incomplete().
virtual Status status() const = 0;
// True if the iterator is invalidated because it is out of the iterator
// upper bound
virtual bool IsOutOfBound() { return false; }
// Pass the PinnedIteratorsManager to the Iterator, most Iterators dont
// communicate with PinnedIteratorsManager so default implementation is no-op
// but for Iterators that need to communicate with PinnedIteratorsManager
// they will implement this function and use the passed pointer to communicate
// with PinnedIteratorsManager.
virtual void SetPinnedItersMgr(PinnedIteratorsManager* /*pinned_iters_mgr*/) {
}
// If true, this means that the Slice returned by key() is valid as long as
// PinnedIteratorsManager::ReleasePinnedData is not called and the
// Iterator is not deleted.
//
// IsKeyPinned() is guaranteed to always return true if
// - Iterator is created with ReadOptions::pin_data = true
// - DB tables were created with BlockBasedTableOptions::use_delta_encoding
// set to false.
virtual bool IsKeyPinned() const { return false; }
// If true, this means that the Slice returned by value() is valid as long as
// PinnedIteratorsManager::ReleasePinnedData is not called and the
// Iterator is not deleted.
virtual bool IsValuePinned() const { return false; }
virtual Status GetProperty(std::string /*prop_name*/, std::string* /*prop*/) {
return Status::NotSupported("");
}
protected:
void SeekForPrevImpl(const Slice& target, const Comparator* cmp) {
Seek(target);
if (!Valid()) {
SeekToLast();
}
while (Valid() && cmp->Compare(target, key()) < 0) {
Prev();
}
}
private:
// No copying allowed
InternalIteratorBase(const InternalIteratorBase&) = delete;
InternalIteratorBase& operator=(const InternalIteratorBase&) = delete;
};
using InternalIterator = InternalIteratorBase<Slice>;
// Return an empty iterator (yields nothing).
template <class TValue = Slice>
extern InternalIteratorBase<TValue>* NewEmptyInternalIterator();
// Return an empty iterator with the specified status.
template <class TValue = Slice>
extern InternalIteratorBase<TValue>* NewErrorInternalIterator(
const Status& status);
// Return an empty iterator with the specified status, allocated arena.
template <class TValue = Slice>
extern InternalIteratorBase<TValue>* NewErrorInternalIterator(
const Status& status, Arena* arena);
} // namespace rocksdb