remove malloc when create data and index iterator in Get
Summary:
Define Block::Iter to be an independent class to be used by block_based_table_reader
When creating data and index iterator, update an existing iterator rather than new one
Thus malloc and free could be reduced
Benchmark,
Base:
commit 76286ee67e
commands:
--db=/dev/shm/rocksdb --num_levels=6 --key_size=20 --prefix_size=20 --keys_per_prefix=0 --value_size=100 --write_buffer_size=134217728 --max_write_buffer_number=2 --target_file_size_base=33554432 --max_bytes_for_level_base=1073741824 --verify_checksum=false --max_background_compactions=4 --use_plain_table=0 --memtablerep=prefix_hash --open_files=-1 --mmap_read=1 --mmap_write=0 --bloom_bits=10 --bloom_locality=1 --memtable_bloom_bits=500000 --compression_type=lz4 --num=2621440 --use_hash_search=1 --block_size=1024 --block_restart_interval=1 --use_existing_db=1 --threads=1 --benchmarks=readrandom —disable_auto_compactions=1
malloc: 3.30% -> 1.42%
free: 3.59%->1.61%
Test Plan:
make all check
run db_stress
valgrind ./db_test ./table_test
Reviewers: ljin, yhchiang, dhruba, igor, sdong
Reviewed By: sdong
Subscribers: leveldb
Differential Revision: https://reviews.facebook.net/D20655
This commit is contained in:
parent
76286ee67e
commit
8f09d53fd1
187
table/block.cc
187
table/block.cc
@ -17,44 +17,14 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "rocksdb/comparator.h"
|
#include "rocksdb/comparator.h"
|
||||||
|
#include "table/format.h"
|
||||||
#include "table/block_hash_index.h"
|
#include "table/block_hash_index.h"
|
||||||
#include "table/block_prefix_index.h"
|
#include "table/block_prefix_index.h"
|
||||||
#include "table/format.h"
|
|
||||||
#include "util/coding.h"
|
#include "util/coding.h"
|
||||||
#include "util/logging.h"
|
#include "util/logging.h"
|
||||||
#include "db/dbformat.h"
|
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
uint32_t Block::NumRestarts() const {
|
|
||||||
assert(size_ >= 2*sizeof(uint32_t));
|
|
||||||
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
Block::Block(const BlockContents& contents)
|
|
||||||
: data_(contents.data.data()),
|
|
||||||
size_(contents.data.size()),
|
|
||||||
owned_(contents.heap_allocated),
|
|
||||||
cachable_(contents.cachable),
|
|
||||||
compression_type_(contents.compression_type) {
|
|
||||||
if (size_ < sizeof(uint32_t)) {
|
|
||||||
size_ = 0; // Error marker
|
|
||||||
} else {
|
|
||||||
restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t);
|
|
||||||
if (restart_offset_ > size_ - sizeof(uint32_t)) {
|
|
||||||
// The size is too small for NumRestarts() and therefore
|
|
||||||
// restart_offset_ wrapped around.
|
|
||||||
size_ = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Block::~Block() {
|
|
||||||
if (owned_) {
|
|
||||||
delete[] data_;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper routine: decode the next block entry starting at "p",
|
// Helper routine: decode the next block entry starting at "p",
|
||||||
// storing the number of shared key bytes, non_shared key bytes,
|
// storing the number of shared key bytes, non_shared key bytes,
|
||||||
// and the length of the value in "*shared", "*non_shared", and
|
// and the length of the value in "*shared", "*non_shared", and
|
||||||
@ -85,78 +55,12 @@ static inline const char* DecodeEntry(const char* p, const char* limit,
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
class Block::Iter : public Iterator {
|
void BlockIter::Next() {
|
||||||
private:
|
|
||||||
const Comparator* const comparator_;
|
|
||||||
const char* const data_; // underlying block contents
|
|
||||||
uint32_t const restarts_; // Offset of restart array (list of fixed32)
|
|
||||||
uint32_t const num_restarts_; // Number of uint32_t entries in restart array
|
|
||||||
|
|
||||||
// current_ is offset in data_ of current entry. >= restarts_ if !Valid
|
|
||||||
uint32_t current_;
|
|
||||||
uint32_t restart_index_; // Index of restart block in which current_ falls
|
|
||||||
IterKey key_;
|
|
||||||
Slice value_;
|
|
||||||
Status status_;
|
|
||||||
BlockHashIndex* hash_index_;
|
|
||||||
BlockPrefixIndex* prefix_index_;
|
|
||||||
|
|
||||||
inline int Compare(const Slice& a, const Slice& b) const {
|
|
||||||
return comparator_->Compare(a, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return the offset in data_ just past the end of the current entry.
|
|
||||||
inline uint32_t NextEntryOffset() const {
|
|
||||||
return (value_.data() + value_.size()) - data_;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t GetRestartPoint(uint32_t index) {
|
|
||||||
assert(index < num_restarts_);
|
|
||||||
return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
void SeekToRestartPoint(uint32_t index) {
|
|
||||||
key_.Clear();
|
|
||||||
restart_index_ = index;
|
|
||||||
// current_ will be fixed by ParseNextKey();
|
|
||||||
|
|
||||||
// ParseNextKey() starts at the end of value_, so set value_ accordingly
|
|
||||||
uint32_t offset = GetRestartPoint(index);
|
|
||||||
value_ = Slice(data_ + offset, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
Iter(const Comparator* comparator, const char* data, uint32_t restarts,
|
|
||||||
uint32_t num_restarts, BlockHashIndex* hash_index,
|
|
||||||
BlockPrefixIndex* prefix_index)
|
|
||||||
: comparator_(comparator),
|
|
||||||
data_(data),
|
|
||||||
restarts_(restarts),
|
|
||||||
num_restarts_(num_restarts),
|
|
||||||
current_(restarts_),
|
|
||||||
restart_index_(num_restarts_),
|
|
||||||
hash_index_(hash_index),
|
|
||||||
prefix_index_(prefix_index) {
|
|
||||||
assert(num_restarts_ > 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual bool Valid() const { return current_ < restarts_; }
|
|
||||||
virtual Status status() const { return status_; }
|
|
||||||
virtual Slice key() const {
|
|
||||||
assert(Valid());
|
|
||||||
return key_.GetKey();
|
|
||||||
}
|
|
||||||
virtual Slice value() const {
|
|
||||||
assert(Valid());
|
|
||||||
return value_;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void Next() {
|
|
||||||
assert(Valid());
|
assert(Valid());
|
||||||
ParseNextKey();
|
ParseNextKey();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void Prev() {
|
void BlockIter::Prev() {
|
||||||
assert(Valid());
|
assert(Valid());
|
||||||
|
|
||||||
// Scan backwards to a restart point before current_
|
// Scan backwards to a restart point before current_
|
||||||
@ -177,7 +81,10 @@ class Block::Iter : public Iterator {
|
|||||||
} while (ParseNextKey() && NextEntryOffset() < original);
|
} while (ParseNextKey() && NextEntryOffset() < original);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void Seek(const Slice& target) {
|
void BlockIter::Seek(const Slice& target) {
|
||||||
|
if (data_ == nullptr) { // Not init yet
|
||||||
|
return;
|
||||||
|
}
|
||||||
uint32_t index = 0;
|
uint32_t index = 0;
|
||||||
bool ok = false;
|
bool ok = false;
|
||||||
if (prefix_index_) {
|
if (prefix_index_) {
|
||||||
@ -199,20 +106,26 @@ class Block::Iter : public Iterator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
virtual void SeekToFirst() {
|
|
||||||
|
void BlockIter::SeekToFirst() {
|
||||||
|
if (data_ == nullptr) { // Not init yet
|
||||||
|
return;
|
||||||
|
}
|
||||||
SeekToRestartPoint(0);
|
SeekToRestartPoint(0);
|
||||||
ParseNextKey();
|
ParseNextKey();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void SeekToLast() {
|
void BlockIter::SeekToLast() {
|
||||||
|
if (data_ == nullptr) { // Not init yet
|
||||||
|
return;
|
||||||
|
}
|
||||||
SeekToRestartPoint(num_restarts_ - 1);
|
SeekToRestartPoint(num_restarts_ - 1);
|
||||||
while (ParseNextKey() && NextEntryOffset() < restarts_) {
|
while (ParseNextKey() && NextEntryOffset() < restarts_) {
|
||||||
// Keep skipping
|
// Keep skipping
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
void BlockIter::CorruptionError() {
|
||||||
void CorruptionError() {
|
|
||||||
current_ = restarts_;
|
current_ = restarts_;
|
||||||
restart_index_ = num_restarts_;
|
restart_index_ = num_restarts_;
|
||||||
status_ = Status::Corruption("bad entry in block");
|
status_ = Status::Corruption("bad entry in block");
|
||||||
@ -220,7 +133,7 @@ class Block::Iter : public Iterator {
|
|||||||
value_.clear();
|
value_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ParseNextKey() {
|
bool BlockIter::ParseNextKey() {
|
||||||
current_ = NextEntryOffset();
|
current_ = NextEntryOffset();
|
||||||
const char* p = data_ + current_;
|
const char* p = data_ + current_;
|
||||||
const char* limit = data_ + restarts_; // Restarts come right after data
|
const char* limit = data_ + restarts_; // Restarts come right after data
|
||||||
@ -250,7 +163,7 @@ class Block::Iter : public Iterator {
|
|||||||
|
|
||||||
// Binary search in restart array to find the first restart point
|
// Binary search in restart array to find the first restart point
|
||||||
// with a key >= target (TODO: this comment is inaccurate)
|
// with a key >= target (TODO: this comment is inaccurate)
|
||||||
bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
|
bool BlockIter::BinarySeek(const Slice& target, uint32_t left, uint32_t right,
|
||||||
uint32_t* index) {
|
uint32_t* index) {
|
||||||
assert(left <= right);
|
assert(left <= right);
|
||||||
|
|
||||||
@ -286,7 +199,7 @@ class Block::Iter : public Iterator {
|
|||||||
|
|
||||||
// Compare target key and the block key of the block of `block_index`.
|
// Compare target key and the block key of the block of `block_index`.
|
||||||
// Return -1 if error.
|
// Return -1 if error.
|
||||||
int CompareBlockKey(uint32_t block_index, const Slice& target) {
|
int BlockIter::CompareBlockKey(uint32_t block_index, const Slice& target) {
|
||||||
uint32_t region_offset = GetRestartPoint(block_index);
|
uint32_t region_offset = GetRestartPoint(block_index);
|
||||||
uint32_t shared, non_shared, value_length;
|
uint32_t shared, non_shared, value_length;
|
||||||
const char* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_,
|
const char* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_,
|
||||||
@ -301,7 +214,7 @@ class Block::Iter : public Iterator {
|
|||||||
|
|
||||||
// Binary search in block_ids to find the first block
|
// Binary search in block_ids to find the first block
|
||||||
// with a key >= target
|
// with a key >= target
|
||||||
bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids,
|
bool BlockIter::BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids,
|
||||||
uint32_t left, uint32_t right,
|
uint32_t left, uint32_t right,
|
||||||
uint32_t* index) {
|
uint32_t* index) {
|
||||||
assert(left <= right);
|
assert(left <= right);
|
||||||
@ -351,7 +264,7 @@ class Block::Iter : public Iterator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HashSeek(const Slice& target, uint32_t* index) {
|
bool BlockIter::HashSeek(const Slice& target, uint32_t* index) {
|
||||||
assert(hash_index_);
|
assert(hash_index_);
|
||||||
auto restart_index = hash_index_->GetRestartIndex(target);
|
auto restart_index = hash_index_->GetRestartIndex(target);
|
||||||
if (restart_index == nullptr) {
|
if (restart_index == nullptr) {
|
||||||
@ -366,12 +279,11 @@ class Block::Iter : public Iterator {
|
|||||||
return BinarySeek(target, left, right, index);
|
return BinarySeek(target, left, right, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PrefixSeek(const Slice& target, uint32_t* index) {
|
bool BlockIter::PrefixSeek(const Slice& target, uint32_t* index) {
|
||||||
assert(prefix_index_);
|
assert(prefix_index_);
|
||||||
uint32_t* block_ids = nullptr;
|
uint32_t* block_ids = nullptr;
|
||||||
uint32_t num_blocks = prefix_index_->GetBlocks(target, &block_ids);
|
uint32_t num_blocks = prefix_index_->GetBlocks(target, &block_ids);
|
||||||
|
|
||||||
|
|
||||||
if (num_blocks == 0) {
|
if (num_blocks == 0) {
|
||||||
current_ = restarts_;
|
current_ = restarts_;
|
||||||
return false;
|
return false;
|
||||||
@ -379,21 +291,66 @@ class Block::Iter : public Iterator {
|
|||||||
return BinaryBlockIndexSeek(target, block_ids, 0, num_blocks - 1, index);
|
return BinaryBlockIndexSeek(target, block_ids, 0, num_blocks - 1, index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
Iterator* Block::NewIterator(const Comparator* cmp) {
|
uint32_t Block::NumRestarts() const {
|
||||||
|
assert(size_ >= 2*sizeof(uint32_t));
|
||||||
|
return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
Block::Block(const BlockContents& contents)
|
||||||
|
: data_(contents.data.data()),
|
||||||
|
size_(contents.data.size()),
|
||||||
|
owned_(contents.heap_allocated),
|
||||||
|
cachable_(contents.cachable),
|
||||||
|
compression_type_(contents.compression_type) {
|
||||||
|
if (size_ < sizeof(uint32_t)) {
|
||||||
|
size_ = 0; // Error marker
|
||||||
|
} else {
|
||||||
|
restart_offset_ = size_ - (1 + NumRestarts()) * sizeof(uint32_t);
|
||||||
|
if (restart_offset_ > size_ - sizeof(uint32_t)) {
|
||||||
|
// The size is too small for NumRestarts() and therefore
|
||||||
|
// restart_offset_ wrapped around.
|
||||||
|
size_ = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Block::~Block() {
|
||||||
|
if (owned_) {
|
||||||
|
delete[] data_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator* Block::NewIterator(const Comparator* cmp, BlockIter* iter) {
|
||||||
if (size_ < 2*sizeof(uint32_t)) {
|
if (size_ < 2*sizeof(uint32_t)) {
|
||||||
|
if (iter != nullptr) {
|
||||||
|
iter->SetStatus(Status::Corruption("bad block contents"));
|
||||||
|
return iter;
|
||||||
|
} else {
|
||||||
return NewErrorIterator(Status::Corruption("bad block contents"));
|
return NewErrorIterator(Status::Corruption("bad block contents"));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
const uint32_t num_restarts = NumRestarts();
|
const uint32_t num_restarts = NumRestarts();
|
||||||
if (num_restarts == 0) {
|
if (num_restarts == 0) {
|
||||||
return NewEmptyIterator();
|
if (iter != nullptr) {
|
||||||
|
iter->SetStatus(Status::OK());
|
||||||
|
return iter;
|
||||||
} else {
|
} else {
|
||||||
return new Iter(cmp, data_, restart_offset_, num_restarts,
|
return NewEmptyIterator();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (iter != nullptr) {
|
||||||
|
iter->Initialize(cmp, data_, restart_offset_, num_restarts,
|
||||||
|
hash_index_.get(), prefix_index_.get());
|
||||||
|
} else {
|
||||||
|
iter = new BlockIter(cmp, data_, restart_offset_, num_restarts,
|
||||||
hash_index_.get(), prefix_index_.get());
|
hash_index_.get(), prefix_index_.get());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
|
||||||
void Block::SetBlockHashIndex(BlockHashIndex* hash_index) {
|
void Block::SetBlockHashIndex(BlockHashIndex* hash_index) {
|
||||||
hash_index_.reset(hash_index);
|
hash_index_.reset(hash_index);
|
||||||
}
|
}
|
||||||
|
128
table/block.h
128
table/block.h
@ -13,11 +13,13 @@
|
|||||||
|
|
||||||
#include "rocksdb/iterator.h"
|
#include "rocksdb/iterator.h"
|
||||||
#include "rocksdb/options.h"
|
#include "rocksdb/options.h"
|
||||||
|
#include "db/dbformat.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
struct BlockContents;
|
struct BlockContents;
|
||||||
class Comparator;
|
class Comparator;
|
||||||
|
class BlockIter;
|
||||||
class BlockHashIndex;
|
class BlockHashIndex;
|
||||||
class BlockPrefixIndex;
|
class BlockPrefixIndex;
|
||||||
|
|
||||||
@ -40,7 +42,11 @@ class Block {
|
|||||||
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
|
// NOTE: for the hash based lookup, if a key prefix doesn't match any key,
|
||||||
// the iterator will simply be set as "invalid", rather than returning
|
// the iterator will simply be set as "invalid", rather than returning
|
||||||
// the key that is just pass the target key.
|
// the key that is just pass the target key.
|
||||||
Iterator* NewIterator(const Comparator* comparator);
|
//
|
||||||
|
// If iter is null, return new Iterator
|
||||||
|
// If iter is not null, update this one and return it as Iterator*
|
||||||
|
Iterator* NewIterator(const Comparator* comparator,
|
||||||
|
BlockIter* iter = nullptr);
|
||||||
void SetBlockHashIndex(BlockHashIndex* hash_index);
|
void SetBlockHashIndex(BlockHashIndex* hash_index);
|
||||||
void SetBlockPrefixIndex(BlockPrefixIndex* prefix_index);
|
void SetBlockPrefixIndex(BlockPrefixIndex* prefix_index);
|
||||||
|
|
||||||
@ -57,8 +63,126 @@ class Block {
|
|||||||
// No copying allowed
|
// No copying allowed
|
||||||
Block(const Block&);
|
Block(const Block&);
|
||||||
void operator=(const Block&);
|
void operator=(const Block&);
|
||||||
|
};
|
||||||
|
|
||||||
|
class BlockIter : public Iterator {
|
||||||
|
public:
|
||||||
|
BlockIter()
|
||||||
|
: comparator_(nullptr),
|
||||||
|
data_(nullptr),
|
||||||
|
restarts_(0),
|
||||||
|
num_restarts_(0),
|
||||||
|
current_(0),
|
||||||
|
restart_index_(0),
|
||||||
|
status_(Status::OK()),
|
||||||
|
hash_index_(nullptr),
|
||||||
|
prefix_index_(nullptr) {}
|
||||||
|
|
||||||
|
BlockIter(const Comparator* comparator, const char* data, uint32_t restarts,
|
||||||
|
uint32_t num_restarts, BlockHashIndex* hash_index,
|
||||||
|
BlockPrefixIndex* prefix_index)
|
||||||
|
: BlockIter() {
|
||||||
|
Initialize(comparator, data, restarts, num_restarts,
|
||||||
|
hash_index, prefix_index);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Initialize(const Comparator* comparator, const char* data,
|
||||||
|
uint32_t restarts, uint32_t num_restarts, BlockHashIndex* hash_index,
|
||||||
|
BlockPrefixIndex* prefix_index) {
|
||||||
|
assert(data_ == nullptr); // Ensure it is called only once
|
||||||
|
assert(num_restarts > 0); // Ensure the param is valid
|
||||||
|
|
||||||
|
comparator_ = comparator;
|
||||||
|
data_ = data;
|
||||||
|
restarts_ = restarts;
|
||||||
|
num_restarts_ = num_restarts;
|
||||||
|
current_ = restarts_;
|
||||||
|
restart_index_ = num_restarts_;
|
||||||
|
hash_index_ = hash_index;
|
||||||
|
prefix_index_ = prefix_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetStatus(Status s) {
|
||||||
|
status_ = s;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool Valid() const override { return current_ < restarts_; }
|
||||||
|
virtual Status status() const override { return status_; }
|
||||||
|
virtual Slice key() const override {
|
||||||
|
assert(Valid());
|
||||||
|
return key_.GetKey();
|
||||||
|
}
|
||||||
|
virtual Slice value() const override {
|
||||||
|
assert(Valid());
|
||||||
|
return value_;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void Next() override;
|
||||||
|
|
||||||
|
virtual void Prev() override;
|
||||||
|
|
||||||
|
virtual void Seek(const Slice& target) override;
|
||||||
|
|
||||||
|
virtual void SeekToFirst() override;
|
||||||
|
|
||||||
|
virtual void SeekToLast() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Comparator* comparator_;
|
||||||
|
const char* data_; // underlying block contents
|
||||||
|
uint32_t restarts_; // Offset of restart array (list of fixed32)
|
||||||
|
uint32_t num_restarts_; // Number of uint32_t entries in restart array
|
||||||
|
|
||||||
|
// current_ is offset in data_ of current entry. >= restarts_ if !Valid
|
||||||
|
uint32_t current_;
|
||||||
|
uint32_t restart_index_; // Index of restart block in which current_ falls
|
||||||
|
IterKey key_;
|
||||||
|
Slice value_;
|
||||||
|
Status status_;
|
||||||
|
BlockHashIndex* hash_index_;
|
||||||
|
BlockPrefixIndex* prefix_index_;
|
||||||
|
|
||||||
|
inline int Compare(const Slice& a, const Slice& b) const {
|
||||||
|
return comparator_->Compare(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the offset in data_ just past the end of the current entry.
|
||||||
|
inline uint32_t NextEntryOffset() const {
|
||||||
|
return (value_.data() + value_.size()) - data_;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t GetRestartPoint(uint32_t index) {
|
||||||
|
assert(index < num_restarts_);
|
||||||
|
return DecodeFixed32(data_ + restarts_ + index * sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
void SeekToRestartPoint(uint32_t index) {
|
||||||
|
key_.Clear();
|
||||||
|
restart_index_ = index;
|
||||||
|
// current_ will be fixed by ParseNextKey();
|
||||||
|
|
||||||
|
// ParseNextKey() starts at the end of value_, so set value_ accordingly
|
||||||
|
uint32_t offset = GetRestartPoint(index);
|
||||||
|
value_ = Slice(data_ + offset, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CorruptionError();
|
||||||
|
|
||||||
|
bool ParseNextKey();
|
||||||
|
|
||||||
|
bool BinarySeek(const Slice& target, uint32_t left, uint32_t right,
|
||||||
|
uint32_t* index);
|
||||||
|
|
||||||
|
int CompareBlockKey(uint32_t block_index, const Slice& target);
|
||||||
|
|
||||||
|
bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids,
|
||||||
|
uint32_t left, uint32_t right,
|
||||||
|
uint32_t* index);
|
||||||
|
|
||||||
|
bool HashSeek(const Slice& target, uint32_t* index);
|
||||||
|
|
||||||
|
bool PrefixSeek(const Slice& target, uint32_t* index);
|
||||||
|
|
||||||
class Iter;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -135,7 +135,9 @@ class BlockBasedTable::IndexReader {
|
|||||||
virtual ~IndexReader() {}
|
virtual ~IndexReader() {}
|
||||||
|
|
||||||
// Create an iterator for index access.
|
// Create an iterator for index access.
|
||||||
virtual Iterator* NewIterator() = 0;
|
// An iter is passed in, if it is not null, update this one and return it
|
||||||
|
// If it is null, create a new Iterator
|
||||||
|
virtual Iterator* NewIterator(BlockIter* iter = nullptr) = 0;
|
||||||
|
|
||||||
// The size of the index.
|
// The size of the index.
|
||||||
virtual size_t size() const = 0;
|
virtual size_t size() const = 0;
|
||||||
@ -168,8 +170,8 @@ class BinarySearchIndexReader : public IndexReader {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual Iterator* NewIterator() override {
|
virtual Iterator* NewIterator(BlockIter* iter = nullptr) override {
|
||||||
return index_block_->NewIterator(comparator_);
|
return index_block_->NewIterator(comparator_, iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual size_t size() const override { return index_block_->size(); }
|
virtual size_t size() const override { return index_block_->size(); }
|
||||||
@ -284,8 +286,8 @@ class HashIndexReader : public IndexReader {
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual Iterator* NewIterator() override {
|
virtual Iterator* NewIterator(BlockIter* iter = nullptr) override {
|
||||||
return index_block_->NewIterator(comparator_);
|
return index_block_->NewIterator(comparator_, iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual size_t size() const override { return index_block_->size(); }
|
virtual size_t size() const override { return index_block_->size(); }
|
||||||
@ -779,10 +781,11 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
|||||||
return { filter, cache_handle };
|
return { filter, cache_handle };
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
|
Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options,
|
||||||
|
BlockIter* input_iter) {
|
||||||
// index reader has already been pre-populated.
|
// index reader has already been pre-populated.
|
||||||
if (rep_->index_reader) {
|
if (rep_->index_reader) {
|
||||||
return rep_->index_reader->NewIterator();
|
return rep_->index_reader->NewIterator(input_iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool no_io = read_options.read_tier == kBlockCacheTier;
|
bool no_io = read_options.read_tier == kBlockCacheTier;
|
||||||
@ -796,8 +799,13 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
|
|||||||
BLOCK_CACHE_INDEX_HIT, statistics);
|
BLOCK_CACHE_INDEX_HIT, statistics);
|
||||||
|
|
||||||
if (cache_handle == nullptr && no_io) {
|
if (cache_handle == nullptr && no_io) {
|
||||||
|
if (input_iter != nullptr) {
|
||||||
|
input_iter->SetStatus(Status::Incomplete("no blocking io"));
|
||||||
|
return input_iter;
|
||||||
|
} else {
|
||||||
return NewErrorIterator(Status::Incomplete("no blocking io"));
|
return NewErrorIterator(Status::Incomplete("no blocking io"));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
IndexReader* index_reader = nullptr;
|
IndexReader* index_reader = nullptr;
|
||||||
if (cache_handle != nullptr) {
|
if (cache_handle != nullptr) {
|
||||||
@ -811,8 +819,13 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
|
|||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
// make sure if something goes wrong, index_reader shall remain intact.
|
// make sure if something goes wrong, index_reader shall remain intact.
|
||||||
assert(index_reader == nullptr);
|
assert(index_reader == nullptr);
|
||||||
|
if (input_iter != nullptr) {
|
||||||
|
input_iter->SetStatus(s);
|
||||||
|
return input_iter;
|
||||||
|
} else {
|
||||||
return NewErrorIterator(s);
|
return NewErrorIterator(s);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cache_handle = block_cache->Insert(key, index_reader, index_reader->size(),
|
cache_handle = block_cache->Insert(key, index_reader, index_reader->size(),
|
||||||
&DeleteCachedEntry<IndexReader>);
|
&DeleteCachedEntry<IndexReader>);
|
||||||
@ -820,7 +833,8 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
assert(cache_handle);
|
assert(cache_handle);
|
||||||
auto iter = index_reader->NewIterator();
|
Iterator* iter;
|
||||||
|
iter = index_reader->NewIterator(input_iter);
|
||||||
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, cache_handle);
|
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache, cache_handle);
|
||||||
|
|
||||||
return iter;
|
return iter;
|
||||||
@ -828,8 +842,11 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
|
|||||||
|
|
||||||
// Convert an index iterator value (i.e., an encoded BlockHandle)
|
// Convert an index iterator value (i.e., an encoded BlockHandle)
|
||||||
// into an iterator over the contents of the corresponding block.
|
// into an iterator over the contents of the corresponding block.
|
||||||
|
// If input_iter is null, new a iterator
|
||||||
|
// If input_iter is not null, update this iter and return it
|
||||||
Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
|
Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
|
||||||
const ReadOptions& ro, const Slice& index_value) {
|
const ReadOptions& ro, const Slice& index_value,
|
||||||
|
BlockIter* input_iter) {
|
||||||
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
||||||
Cache* block_cache = rep->options.block_cache.get();
|
Cache* block_cache = rep->options.block_cache.get();
|
||||||
Cache* block_cache_compressed = rep->options.
|
Cache* block_cache_compressed = rep->options.
|
||||||
@ -843,8 +860,13 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
|
|||||||
Status s = handle.DecodeFrom(&input);
|
Status s = handle.DecodeFrom(&input);
|
||||||
|
|
||||||
if (!s.ok()) {
|
if (!s.ok()) {
|
||||||
|
if (input_iter != nullptr) {
|
||||||
|
input_iter->SetStatus(s);
|
||||||
|
return input_iter;
|
||||||
|
} else {
|
||||||
return NewErrorIterator(s);
|
return NewErrorIterator(s);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If either block cache is enabled, we'll try to read from it.
|
// If either block cache is enabled, we'll try to read from it.
|
||||||
if (block_cache != nullptr || block_cache_compressed != nullptr) {
|
if (block_cache != nullptr || block_cache_compressed != nullptr) {
|
||||||
@ -889,24 +911,34 @@ Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
|
|||||||
if (block.value == nullptr) {
|
if (block.value == nullptr) {
|
||||||
if (no_io) {
|
if (no_io) {
|
||||||
// Could not read from block_cache and can't do IO
|
// Could not read from block_cache and can't do IO
|
||||||
|
if (input_iter != nullptr) {
|
||||||
|
input_iter->SetStatus(Status::Incomplete("no blocking io"));
|
||||||
|
return input_iter;
|
||||||
|
} else {
|
||||||
return NewErrorIterator(Status::Incomplete("no blocking io"));
|
return NewErrorIterator(Status::Incomplete("no blocking io"));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
s = ReadBlockFromFile(rep->file.get(), rep->footer, ro, handle,
|
s = ReadBlockFromFile(rep->file.get(), rep->footer, ro, handle,
|
||||||
&block.value, rep->options.env);
|
&block.value, rep->options.env);
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator* iter;
|
Iterator* iter;
|
||||||
if (block.value != nullptr) {
|
if (block.value != nullptr) {
|
||||||
iter = block.value->NewIterator(&rep->internal_comparator);
|
iter = block.value->NewIterator(&rep->internal_comparator, input_iter);
|
||||||
if (block.cache_handle != nullptr) {
|
if (block.cache_handle != nullptr) {
|
||||||
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
||||||
block.cache_handle);
|
block.cache_handle);
|
||||||
} else {
|
} else {
|
||||||
iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
|
iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
if (input_iter != nullptr) {
|
||||||
|
input_iter->SetStatus(s);
|
||||||
|
iter = input_iter;
|
||||||
} else {
|
} else {
|
||||||
iter = NewErrorIterator(s);
|
iter = NewErrorIterator(s);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return iter;
|
return iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1023,12 +1055,14 @@ Status BlockBasedTable::Get(
|
|||||||
const Slice& v),
|
const Slice& v),
|
||||||
void (*mark_key_may_exist_handler)(void* handle_context)) {
|
void (*mark_key_may_exist_handler)(void* handle_context)) {
|
||||||
Status s;
|
Status s;
|
||||||
Iterator* iiter = NewIndexIterator(read_options);
|
BlockIter iiter;
|
||||||
|
NewIndexIterator(read_options, &iiter);
|
||||||
|
|
||||||
auto filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier);
|
auto filter_entry = GetFilter(read_options.read_tier == kBlockCacheTier);
|
||||||
FilterBlockReader* filter = filter_entry.value;
|
FilterBlockReader* filter = filter_entry.value;
|
||||||
bool done = false;
|
bool done = false;
|
||||||
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
|
for (iiter.Seek(key); iiter.Valid() && !done; iiter.Next()) {
|
||||||
Slice handle_value = iiter->value();
|
Slice handle_value = iiter.value();
|
||||||
|
|
||||||
BlockHandle handle;
|
BlockHandle handle;
|
||||||
bool may_not_exist_in_filter =
|
bool may_not_exist_in_filter =
|
||||||
@ -1043,39 +1077,43 @@ Status BlockBasedTable::Get(
|
|||||||
RecordTick(rep_->options.statistics.get(), BLOOM_FILTER_USEFUL);
|
RecordTick(rep_->options.statistics.get(), BLOOM_FILTER_USEFUL);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
unique_ptr<Iterator> block_iter(
|
BlockIter biter;
|
||||||
NewDataBlockIterator(rep_, read_options, iiter->value()));
|
NewDataBlockIterator(rep_, read_options, iiter.value(), &biter);
|
||||||
|
|
||||||
if (read_options.read_tier && block_iter->status().IsIncomplete()) {
|
if (read_options.read_tier && biter.status().IsIncomplete()) {
|
||||||
// couldn't get block from block_cache
|
// couldn't get block from block_cache
|
||||||
// Update Saver.state to Found because we are only looking for whether
|
// Update Saver.state to Found because we are only looking for whether
|
||||||
// we can guarantee the key is not there when "no_io" is set
|
// we can guarantee the key is not there when "no_io" is set
|
||||||
(*mark_key_may_exist_handler)(handle_context);
|
(*mark_key_may_exist_handler)(handle_context);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (!biter.status().ok()) {
|
||||||
|
s = biter.status();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Call the *saver function on each entry/block until it returns false
|
// Call the *saver function on each entry/block until it returns false
|
||||||
for (block_iter->Seek(key); block_iter->Valid(); block_iter->Next()) {
|
for (biter.Seek(key); biter.Valid(); biter.Next()) {
|
||||||
ParsedInternalKey parsed_key;
|
ParsedInternalKey parsed_key;
|
||||||
if (!ParseInternalKey(block_iter->key(), &parsed_key)) {
|
if (!ParseInternalKey(biter.key(), &parsed_key)) {
|
||||||
s = Status::Corruption(Slice());
|
s = Status::Corruption(Slice());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(*result_handler)(handle_context, parsed_key,
|
if (!(*result_handler)(handle_context, parsed_key,
|
||||||
block_iter->value())) {
|
biter.value())) {
|
||||||
done = true;
|
done = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s = block_iter->status();
|
s = biter.status();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
filter_entry.Release(rep_->options.block_cache.get());
|
filter_entry.Release(rep_->options.block_cache.get());
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
s = iiter->status();
|
s = iiter.status();
|
||||||
}
|
}
|
||||||
delete iiter;
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
class Block;
|
class Block;
|
||||||
|
class BlockIter;
|
||||||
class BlockHandle;
|
class BlockHandle;
|
||||||
class Cache;
|
class Cache;
|
||||||
class FilterBlockReader;
|
class FilterBlockReader;
|
||||||
@ -111,8 +112,10 @@ class BlockBasedTable : public TableReader {
|
|||||||
bool compaction_optimized_;
|
bool compaction_optimized_;
|
||||||
|
|
||||||
class BlockEntryIteratorState;
|
class BlockEntryIteratorState;
|
||||||
|
// input_iter: if it is not null, update this one and return it as Iterator
|
||||||
static Iterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
|
static Iterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
|
||||||
const Slice& index_value);
|
const Slice& index_value,
|
||||||
|
BlockIter* input_iter = nullptr);
|
||||||
|
|
||||||
// For the following two functions:
|
// For the following two functions:
|
||||||
// if `no_io == true`, we will not try to read filter/index from sst file
|
// if `no_io == true`, we will not try to read filter/index from sst file
|
||||||
@ -120,6 +123,8 @@ class BlockBasedTable : public TableReader {
|
|||||||
CachableEntry<FilterBlockReader> GetFilter(bool no_io = false) const;
|
CachableEntry<FilterBlockReader> GetFilter(bool no_io = false) const;
|
||||||
|
|
||||||
// Get the iterator from the index reader.
|
// Get the iterator from the index reader.
|
||||||
|
// If input_iter is not set, return new Iterator
|
||||||
|
// If input_iter is set, update it and return it as Iterator
|
||||||
//
|
//
|
||||||
// Note: ErrorIterator with Status::Incomplete shall be returned if all the
|
// Note: ErrorIterator with Status::Incomplete shall be returned if all the
|
||||||
// following conditions are met:
|
// following conditions are met:
|
||||||
@ -127,7 +132,8 @@ class BlockBasedTable : public TableReader {
|
|||||||
// 2. index is not present in block cache.
|
// 2. index is not present in block cache.
|
||||||
// 3. We disallowed any io to be performed, that is, read_options ==
|
// 3. We disallowed any io to be performed, that is, read_options ==
|
||||||
// kBlockCacheTier
|
// kBlockCacheTier
|
||||||
Iterator* NewIndexIterator(const ReadOptions& read_options);
|
Iterator* NewIndexIterator(const ReadOptions& read_options,
|
||||||
|
BlockIter* input_iter = nullptr);
|
||||||
|
|
||||||
// Read block cache from block caches (if set): block_cache and
|
// Read block cache from block caches (if set): block_cache and
|
||||||
// block_cache_compressed.
|
// block_cache_compressed.
|
||||||
|
Loading…
Reference in New Issue
Block a user