Refactoring
Summary: This is the first split of https://github.com/facebook/rocksdb/pull/1891 and will be needed for the upcoming partitioned filter patch. Closes https://github.com/facebook/rocksdb/pull/1949 Differential Revision: D4652152 Pulled By: maysamyabandeh fbshipit-source-id: 9801778
This commit is contained in:
parent
2a5daa06f0
commit
a2f7a514d1
@ -336,10 +336,12 @@ set(SOURCES
|
|||||||
table/format.cc
|
table/format.cc
|
||||||
table/full_filter_block.cc
|
table/full_filter_block.cc
|
||||||
table/get_context.cc
|
table/get_context.cc
|
||||||
|
table/index_builder.cc
|
||||||
table/iterator.cc
|
table/iterator.cc
|
||||||
table/merging_iterator.cc
|
table/merging_iterator.cc
|
||||||
table/sst_file_writer.cc
|
table/sst_file_writer.cc
|
||||||
table/meta_blocks.cc
|
table/meta_blocks.cc
|
||||||
|
table/partitioned_filter_block.cc
|
||||||
table/plain_table_builder.cc
|
table/plain_table_builder.cc
|
||||||
table/plain_table_factory.cc
|
table/plain_table_factory.cc
|
||||||
table/plain_table_index.cc
|
table/plain_table_index.cc
|
||||||
|
2
src.mk
2
src.mk
@ -72,10 +72,12 @@ LIB_SOURCES = \
|
|||||||
table/format.cc \
|
table/format.cc \
|
||||||
table/full_filter_block.cc \
|
table/full_filter_block.cc \
|
||||||
table/get_context.cc \
|
table/get_context.cc \
|
||||||
|
table/index_builder.cc \
|
||||||
table/iterator.cc \
|
table/iterator.cc \
|
||||||
table/merging_iterator.cc \
|
table/merging_iterator.cc \
|
||||||
table/meta_blocks.cc \
|
table/meta_blocks.cc \
|
||||||
table/sst_file_writer.cc \
|
table/sst_file_writer.cc \
|
||||||
|
table/partitioned_filter_block.cc \
|
||||||
table/plain_table_builder.cc \
|
table/plain_table_builder.cc \
|
||||||
table/plain_table_factory.cc \
|
table/plain_table_factory.cc \
|
||||||
table/plain_table_index.cc \
|
table/plain_table_index.cc \
|
||||||
|
@ -31,14 +31,16 @@
|
|||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
|
|
||||||
#include "table/block.h"
|
#include "table/block.h"
|
||||||
|
#include "table/block_based_filter_block.h"
|
||||||
|
#include "table/block_based_table_factory.h"
|
||||||
#include "table/block_based_table_reader.h"
|
#include "table/block_based_table_reader.h"
|
||||||
#include "table/block_builder.h"
|
#include "table/block_builder.h"
|
||||||
#include "table/filter_block.h"
|
#include "table/filter_block.h"
|
||||||
#include "table/block_based_filter_block.h"
|
|
||||||
#include "table/block_based_table_factory.h"
|
|
||||||
#include "table/full_filter_block.h"
|
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
|
#include "table/full_filter_block.h"
|
||||||
|
#include "table/index_builder.h"
|
||||||
#include "table/meta_blocks.h"
|
#include "table/meta_blocks.h"
|
||||||
|
#include "table/partitioned_filter_block.h"
|
||||||
#include "table/table_builder.h"
|
#include "table/table_builder.h"
|
||||||
|
|
||||||
#include "util/string_util.h"
|
#include "util/string_util.h"
|
||||||
@ -54,384 +56,10 @@ extern const std::string kHashIndexPrefixesBlock;
|
|||||||
extern const std::string kHashIndexPrefixesMetadataBlock;
|
extern const std::string kHashIndexPrefixesMetadataBlock;
|
||||||
|
|
||||||
typedef BlockBasedTableOptions::IndexType IndexType;
|
typedef BlockBasedTableOptions::IndexType IndexType;
|
||||||
class IndexBuilder;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
rocksdb::IndexBuilder* CreateIndexBuilder(
|
|
||||||
IndexType index_type, const InternalKeyComparator* comparator,
|
|
||||||
const SliceTransform* prefix_extractor, int index_block_restart_interval,
|
|
||||||
uint64_t index_per_partition);
|
|
||||||
}
|
|
||||||
|
|
||||||
// The interface for building index.
|
|
||||||
// Instruction for adding a new concrete IndexBuilder:
|
|
||||||
// 1. Create a subclass instantiated from IndexBuilder.
|
|
||||||
// 2. Add a new entry associated with that subclass in TableOptions::IndexType.
|
|
||||||
// 3. Add a create function for the new subclass in CreateIndexBuilder.
|
|
||||||
// Note: we can devise more advanced design to simplify the process for adding
|
|
||||||
// new subclass, which will, on the other hand, increase the code complexity and
|
|
||||||
// catch unwanted attention from readers. Given that we won't add/change
|
|
||||||
// indexes frequently, it makes sense to just embrace a more straightforward
|
|
||||||
// design that just works.
|
|
||||||
class IndexBuilder {
|
|
||||||
public:
|
|
||||||
// Index builder will construct a set of blocks which contain:
|
|
||||||
// 1. One primary index block.
|
|
||||||
// 2. (Optional) a set of metablocks that contains the metadata of the
|
|
||||||
// primary index.
|
|
||||||
struct IndexBlocks {
|
|
||||||
Slice index_block_contents;
|
|
||||||
std::unordered_map<std::string, Slice> meta_blocks;
|
|
||||||
};
|
|
||||||
explicit IndexBuilder(const InternalKeyComparator* comparator)
|
|
||||||
: comparator_(comparator) {}
|
|
||||||
|
|
||||||
virtual ~IndexBuilder() {}
|
|
||||||
|
|
||||||
// Add a new index entry to index block.
|
|
||||||
// To allow further optimization, we provide `last_key_in_current_block` and
|
|
||||||
// `first_key_in_next_block`, based on which the specific implementation can
|
|
||||||
// determine the best index key to be used for the index block.
|
|
||||||
// @last_key_in_current_block: this parameter maybe overridden with the value
|
|
||||||
// "substitute key".
|
|
||||||
// @first_key_in_next_block: it will be nullptr if the entry being added is
|
|
||||||
// the last one in the table
|
|
||||||
//
|
|
||||||
// REQUIRES: Finish() has not yet been called.
|
|
||||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
|
||||||
const Slice* first_key_in_next_block,
|
|
||||||
const BlockHandle& block_handle) = 0;
|
|
||||||
|
|
||||||
// This method will be called whenever a key is added. The subclasses may
|
|
||||||
// override OnKeyAdded() if they need to collect additional information.
|
|
||||||
virtual void OnKeyAdded(const Slice& key) {}
|
|
||||||
|
|
||||||
// Inform the index builder that all entries has been written. Block builder
|
|
||||||
// may therefore perform any operation required for block finalization.
|
|
||||||
//
|
|
||||||
// REQUIRES: Finish() has not yet been called.
|
|
||||||
inline Status Finish(IndexBlocks* index_blocks) {
|
|
||||||
// Throw away the changes to last_partition_block_handle. It has no effect
|
|
||||||
// on the first call to Finish anyway.
|
|
||||||
BlockHandle last_partition_block_handle;
|
|
||||||
return Finish(index_blocks, last_partition_block_handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This override of Finish can be utilized to build the 2nd level index in
|
|
||||||
// PartitionIndexBuilder.
|
|
||||||
//
|
|
||||||
// index_blocks will be filled with the resulting index data. If the return
|
|
||||||
// value is Status::InComplete() then it means that the index is partitioned
|
|
||||||
// and the callee should keep calling Finish until Status::OK() is returned.
|
|
||||||
// In that case, last_partition_block_handle is pointer to the block written
|
|
||||||
// with the result of the last call to Finish. This can be utilized to build
|
|
||||||
// the second level index pointing to each block of partitioned indexes. The
|
|
||||||
// last call to Finish() that returns Status::OK() populates index_blocks with
|
|
||||||
// the 2nd level index content.
|
|
||||||
virtual Status Finish(IndexBlocks* index_blocks,
|
|
||||||
const BlockHandle& last_partition_block_handle) = 0;
|
|
||||||
|
|
||||||
// Get the estimated size for index block.
|
|
||||||
virtual size_t EstimatedSize() const = 0;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
const InternalKeyComparator* comparator_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// This index builder builds space-efficient index block.
|
|
||||||
//
|
|
||||||
// Optimizations:
|
|
||||||
// 1. Made block's `block_restart_interval` to be 1, which will avoid linear
|
|
||||||
// search when doing index lookup (can be disabled by setting
|
|
||||||
// index_block_restart_interval).
|
|
||||||
// 2. Shorten the key length for index block. Other than honestly using the
|
|
||||||
// last key in the data block as the index key, we instead find a shortest
|
|
||||||
// substitute key that serves the same function.
|
|
||||||
class ShortenedIndexBuilder : public IndexBuilder {
|
|
||||||
public:
|
|
||||||
explicit ShortenedIndexBuilder(const InternalKeyComparator* comparator,
|
|
||||||
int index_block_restart_interval)
|
|
||||||
: IndexBuilder(comparator),
|
|
||||||
index_block_builder_(index_block_restart_interval) {}
|
|
||||||
|
|
||||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
|
||||||
const Slice* first_key_in_next_block,
|
|
||||||
const BlockHandle& block_handle) override {
|
|
||||||
if (first_key_in_next_block != nullptr) {
|
|
||||||
comparator_->FindShortestSeparator(last_key_in_current_block,
|
|
||||||
*first_key_in_next_block);
|
|
||||||
} else {
|
|
||||||
comparator_->FindShortSuccessor(last_key_in_current_block);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string handle_encoding;
|
|
||||||
block_handle.EncodeTo(&handle_encoding);
|
|
||||||
index_block_builder_.Add(*last_key_in_current_block, handle_encoding);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual Status Finish(
|
|
||||||
IndexBlocks* index_blocks,
|
|
||||||
const BlockHandle& last_partition_block_handle) override {
|
|
||||||
index_blocks->index_block_contents = index_block_builder_.Finish();
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual size_t EstimatedSize() const override {
|
|
||||||
return index_block_builder_.CurrentSizeEstimate();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
BlockBuilder index_block_builder_;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IndexBuilder for two-level indexing. Internally it creates a new index for
|
|
||||||
* each partition and Finish then in order when Finish is called on it
|
|
||||||
* continiously until Status::OK() is returned.
|
|
||||||
*
|
|
||||||
* The format on the disk would be I I I I I I IP where I is block containing a
|
|
||||||
* partition of indexes built using ShortenedIndexBuilder and IP is a block
|
|
||||||
* containing a secondary index on the partitions, built using
|
|
||||||
* ShortenedIndexBuilder.
|
|
||||||
*/
|
|
||||||
class PartitionIndexBuilder : public IndexBuilder {
|
|
||||||
public:
|
|
||||||
explicit PartitionIndexBuilder(const InternalKeyComparator* comparator,
|
|
||||||
const SliceTransform* prefix_extractor,
|
|
||||||
const uint64_t index_per_partition,
|
|
||||||
int index_block_restart_interval)
|
|
||||||
: IndexBuilder(comparator),
|
|
||||||
prefix_extractor_(prefix_extractor),
|
|
||||||
index_block_builder_(index_block_restart_interval),
|
|
||||||
index_per_partition_(index_per_partition),
|
|
||||||
index_block_restart_interval_(index_block_restart_interval) {
|
|
||||||
sub_index_builder_ =
|
|
||||||
CreateIndexBuilder(sub_type_, comparator_, prefix_extractor_,
|
|
||||||
index_block_restart_interval_, index_per_partition_);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual ~PartitionIndexBuilder() { delete sub_index_builder_; }
|
|
||||||
|
|
||||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
|
||||||
const Slice* first_key_in_next_block,
|
|
||||||
const BlockHandle& block_handle) override {
|
|
||||||
sub_index_builder_->AddIndexEntry(last_key_in_current_block,
|
|
||||||
first_key_in_next_block, block_handle);
|
|
||||||
num_indexes++;
|
|
||||||
if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys
|
|
||||||
entries_.push_back({std::string(*last_key_in_current_block),
|
|
||||||
std::unique_ptr<IndexBuilder>(sub_index_builder_)});
|
|
||||||
sub_index_builder_ = nullptr;
|
|
||||||
} else if (num_indexes % index_per_partition_ == 0) {
|
|
||||||
entries_.push_back({std::string(*last_key_in_current_block),
|
|
||||||
std::unique_ptr<IndexBuilder>(sub_index_builder_)});
|
|
||||||
sub_index_builder_ = CreateIndexBuilder(
|
|
||||||
sub_type_, comparator_, prefix_extractor_,
|
|
||||||
index_block_restart_interval_, index_per_partition_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual Status Finish(
|
|
||||||
IndexBlocks* index_blocks,
|
|
||||||
const BlockHandle& last_partition_block_handle) override {
|
|
||||||
assert(!entries_.empty());
|
|
||||||
// It must be set to null after last key is added
|
|
||||||
assert(sub_index_builder_ == nullptr);
|
|
||||||
if (finishing == true) {
|
|
||||||
Entry& last_entry = entries_.front();
|
|
||||||
std::string handle_encoding;
|
|
||||||
last_partition_block_handle.EncodeTo(&handle_encoding);
|
|
||||||
index_block_builder_.Add(last_entry.key, handle_encoding);
|
|
||||||
entries_.pop_front();
|
|
||||||
}
|
|
||||||
// If there is no sub_index left, then return the 2nd level index.
|
|
||||||
if (UNLIKELY(entries_.empty())) {
|
|
||||||
index_blocks->index_block_contents = index_block_builder_.Finish();
|
|
||||||
return Status::OK();
|
|
||||||
} else {
|
|
||||||
// Finish the next partition index in line and Incomplete() to indicate we
|
|
||||||
// expect more calls to Finish
|
|
||||||
Entry& entry = entries_.front();
|
|
||||||
auto s = entry.value->Finish(index_blocks);
|
|
||||||
finishing = true;
|
|
||||||
return s.ok() ? Status::Incomplete() : s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual size_t EstimatedSize() const override {
|
|
||||||
size_t total = 0;
|
|
||||||
for (auto it = entries_.begin(); it != entries_.end(); ++it) {
|
|
||||||
total += it->value->EstimatedSize();
|
|
||||||
}
|
|
||||||
total += index_block_builder_.CurrentSizeEstimate();
|
|
||||||
total +=
|
|
||||||
sub_index_builder_ == nullptr ? 0 : sub_index_builder_->EstimatedSize();
|
|
||||||
return total;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
static const IndexType sub_type_ = BlockBasedTableOptions::kBinarySearch;
|
|
||||||
struct Entry {
|
|
||||||
std::string key;
|
|
||||||
std::unique_ptr<IndexBuilder> value;
|
|
||||||
};
|
|
||||||
std::list<Entry> entries_; // list of partitioned indexes and their keys
|
|
||||||
const SliceTransform* prefix_extractor_;
|
|
||||||
BlockBuilder index_block_builder_; // top-level index builder
|
|
||||||
IndexBuilder* sub_index_builder_; // the active partition index builder
|
|
||||||
uint64_t index_per_partition_;
|
|
||||||
int index_block_restart_interval_;
|
|
||||||
uint64_t num_indexes = 0;
|
|
||||||
bool finishing =
|
|
||||||
false; // true if Finish is called once but not complete yet.
|
|
||||||
};
|
|
||||||
|
|
||||||
// HashIndexBuilder contains a binary-searchable primary index and the
|
|
||||||
// metadata for secondary hash index construction.
|
|
||||||
// The metadata for hash index consists two parts:
|
|
||||||
// - a metablock that compactly contains a sequence of prefixes. All prefixes
|
|
||||||
// are stored consectively without any metadata (like, prefix sizes) being
|
|
||||||
// stored, which is kept in the other metablock.
|
|
||||||
// - a metablock contains the metadata of the prefixes, including prefix size,
|
|
||||||
// restart index and number of block it spans. The format looks like:
|
|
||||||
//
|
|
||||||
// +-----------------+---------------------------+---------------------+ <=prefix 1
|
|
||||||
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
|
|
||||||
// +-----------------+---------------------------+---------------------+ <=prefix 2
|
|
||||||
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
|
|
||||||
// +-----------------+---------------------------+---------------------+
|
|
||||||
// | |
|
|
||||||
// | .... |
|
|
||||||
// | |
|
|
||||||
// +-----------------+---------------------------+---------------------+ <=prefix n
|
|
||||||
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
|
|
||||||
// +-----------------+---------------------------+---------------------+
|
|
||||||
//
|
|
||||||
// The reason of separating these two metablocks is to enable the efficiently
|
|
||||||
// reuse the first metablock during hash index construction without unnecessary
|
|
||||||
// data copy or small heap allocations for prefixes.
|
|
||||||
class HashIndexBuilder : public IndexBuilder {
|
|
||||||
public:
|
|
||||||
explicit HashIndexBuilder(const InternalKeyComparator* comparator,
|
|
||||||
const SliceTransform* hash_key_extractor,
|
|
||||||
int index_block_restart_interval)
|
|
||||||
: IndexBuilder(comparator),
|
|
||||||
primary_index_builder_(comparator, index_block_restart_interval),
|
|
||||||
hash_key_extractor_(hash_key_extractor) {}
|
|
||||||
|
|
||||||
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
|
||||||
const Slice* first_key_in_next_block,
|
|
||||||
const BlockHandle& block_handle) override {
|
|
||||||
++current_restart_index_;
|
|
||||||
primary_index_builder_.AddIndexEntry(last_key_in_current_block,
|
|
||||||
first_key_in_next_block, block_handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void OnKeyAdded(const Slice& key) override {
|
|
||||||
auto key_prefix = hash_key_extractor_->Transform(key);
|
|
||||||
bool is_first_entry = pending_block_num_ == 0;
|
|
||||||
|
|
||||||
// Keys may share the prefix
|
|
||||||
if (is_first_entry || pending_entry_prefix_ != key_prefix) {
|
|
||||||
if (!is_first_entry) {
|
|
||||||
FlushPendingPrefix();
|
|
||||||
}
|
|
||||||
|
|
||||||
// need a hard copy otherwise the underlying data changes all the time.
|
|
||||||
// TODO(kailiu) ToString() is expensive. We may speed up can avoid data
|
|
||||||
// copy.
|
|
||||||
pending_entry_prefix_ = key_prefix.ToString();
|
|
||||||
pending_block_num_ = 1;
|
|
||||||
pending_entry_index_ = static_cast<uint32_t>(current_restart_index_);
|
|
||||||
} else {
|
|
||||||
// entry number increments when keys share the prefix reside in
|
|
||||||
// different data blocks.
|
|
||||||
auto last_restart_index = pending_entry_index_ + pending_block_num_ - 1;
|
|
||||||
assert(last_restart_index <= current_restart_index_);
|
|
||||||
if (last_restart_index != current_restart_index_) {
|
|
||||||
++pending_block_num_;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual Status Finish(
|
|
||||||
IndexBlocks* index_blocks,
|
|
||||||
const BlockHandle& last_partition_block_handle) override {
|
|
||||||
FlushPendingPrefix();
|
|
||||||
primary_index_builder_.Finish(index_blocks, last_partition_block_handle);
|
|
||||||
index_blocks->meta_blocks.insert(
|
|
||||||
{kHashIndexPrefixesBlock.c_str(), prefix_block_});
|
|
||||||
index_blocks->meta_blocks.insert(
|
|
||||||
{kHashIndexPrefixesMetadataBlock.c_str(), prefix_meta_block_});
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual size_t EstimatedSize() const override {
|
|
||||||
return primary_index_builder_.EstimatedSize() + prefix_block_.size() +
|
|
||||||
prefix_meta_block_.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
void FlushPendingPrefix() {
|
|
||||||
prefix_block_.append(pending_entry_prefix_.data(),
|
|
||||||
pending_entry_prefix_.size());
|
|
||||||
PutVarint32Varint32Varint32(
|
|
||||||
&prefix_meta_block_,
|
|
||||||
static_cast<uint32_t>(pending_entry_prefix_.size()),
|
|
||||||
pending_entry_index_, pending_block_num_);
|
|
||||||
}
|
|
||||||
|
|
||||||
ShortenedIndexBuilder primary_index_builder_;
|
|
||||||
const SliceTransform* hash_key_extractor_;
|
|
||||||
|
|
||||||
// stores a sequence of prefixes
|
|
||||||
std::string prefix_block_;
|
|
||||||
// stores the metadata of prefixes
|
|
||||||
std::string prefix_meta_block_;
|
|
||||||
|
|
||||||
// The following 3 variables keeps unflushed prefix and its metadata.
|
|
||||||
// The details of block_num and entry_index can be found in
|
|
||||||
// "block_hash_index.{h,cc}"
|
|
||||||
uint32_t pending_block_num_ = 0;
|
|
||||||
uint32_t pending_entry_index_ = 0;
|
|
||||||
std::string pending_entry_prefix_;
|
|
||||||
|
|
||||||
uint64_t current_restart_index_ = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Without anonymous namespace here, we fail the warning -Wmissing-prototypes
|
// Without anonymous namespace here, we fail the warning -Wmissing-prototypes
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// Create a index builder based on its type.
|
|
||||||
IndexBuilder* CreateIndexBuilder(IndexType index_type,
|
|
||||||
const InternalKeyComparator* comparator,
|
|
||||||
const SliceTransform* prefix_extractor,
|
|
||||||
int index_block_restart_interval,
|
|
||||||
uint64_t index_per_partition) {
|
|
||||||
switch (index_type) {
|
|
||||||
case BlockBasedTableOptions::kBinarySearch: {
|
|
||||||
return new ShortenedIndexBuilder(comparator,
|
|
||||||
index_block_restart_interval);
|
|
||||||
}
|
|
||||||
case BlockBasedTableOptions::kHashSearch: {
|
|
||||||
return new HashIndexBuilder(comparator, prefix_extractor,
|
|
||||||
index_block_restart_interval);
|
|
||||||
}
|
|
||||||
case BlockBasedTableOptions::kTwoLevelIndexSearch: {
|
|
||||||
return new PartitionIndexBuilder(comparator, prefix_extractor,
|
|
||||||
index_per_partition,
|
|
||||||
index_block_restart_interval);
|
|
||||||
}
|
|
||||||
default: {
|
|
||||||
assert(!"Do not recognize the index type ");
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// impossible.
|
|
||||||
assert(false);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a index builder based on its type.
|
// Create a index builder based on its type.
|
||||||
FilterBlockBuilder* CreateFilterBlockBuilder(const ImmutableCFOptions& opt,
|
FilterBlockBuilder* CreateFilterBlockBuilder(const ImmutableCFOptions& opt,
|
||||||
const BlockBasedTableOptions& table_opt) {
|
const BlockBasedTableOptions& table_opt) {
|
||||||
@ -649,11 +277,11 @@ struct BlockBasedTableBuilder::Rep {
|
|||||||
table_options.use_delta_encoding),
|
table_options.use_delta_encoding),
|
||||||
range_del_block(1), // TODO(andrewkr): restart_interval unnecessary
|
range_del_block(1), // TODO(andrewkr): restart_interval unnecessary
|
||||||
internal_prefix_transform(_ioptions.prefix_extractor),
|
internal_prefix_transform(_ioptions.prefix_extractor),
|
||||||
index_builder(
|
index_builder(IndexBuilder::CreateIndexBuilder(
|
||||||
CreateIndexBuilder(table_options.index_type, &internal_comparator,
|
table_options.index_type, &internal_comparator,
|
||||||
&this->internal_prefix_transform,
|
&this->internal_prefix_transform,
|
||||||
table_options.index_block_restart_interval,
|
table_options.index_block_restart_interval,
|
||||||
table_options.index_per_partition)),
|
table_options.index_per_partition)),
|
||||||
compression_type(_compression_type),
|
compression_type(_compression_type),
|
||||||
compression_opts(_compression_opts),
|
compression_opts(_compression_opts),
|
||||||
compression_dict(_compression_dict),
|
compression_dict(_compression_dict),
|
||||||
|
@ -57,6 +57,11 @@ using std::unique_ptr;
|
|||||||
|
|
||||||
typedef BlockBasedTable::IndexReader IndexReader;
|
typedef BlockBasedTable::IndexReader IndexReader;
|
||||||
|
|
||||||
|
BlockBasedTable::~BlockBasedTable() {
|
||||||
|
Close();
|
||||||
|
delete rep_;
|
||||||
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// Read the block identified by "handle" from "file".
|
// Read the block identified by "handle" from "file".
|
||||||
// The only relevant option is options.verify_checksums for now.
|
// The only relevant option is options.verify_checksums for now.
|
||||||
@ -143,42 +148,6 @@ Cache::Handle* GetEntryFromCache(Cache* block_cache, const Slice& key,
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
// -- IndexReader and its subclasses
|
|
||||||
// IndexReader is the interface that provide the functionality for index access.
|
|
||||||
class BlockBasedTable::IndexReader {
|
|
||||||
public:
|
|
||||||
explicit IndexReader(const Comparator* comparator, Statistics* stats)
|
|
||||||
: comparator_(comparator), statistics_(stats) {}
|
|
||||||
|
|
||||||
virtual ~IndexReader() {}
|
|
||||||
|
|
||||||
// Create an iterator for index access.
|
|
||||||
// If iter is null then a new object is created on heap and the callee will
|
|
||||||
// have the ownership. If a non-null iter is passed in it will be used, and
|
|
||||||
// the returned value is either the same as iter or a new on-heap object that
|
|
||||||
// wrapps the passed iter. In the latter case the return value would point to
|
|
||||||
// a different object then iter and the callee has the ownership of the
|
|
||||||
// returned object.
|
|
||||||
virtual InternalIterator* NewIterator(BlockIter* iter = nullptr,
|
|
||||||
bool total_order_seek = true) = 0;
|
|
||||||
|
|
||||||
// The size of the index.
|
|
||||||
virtual size_t size() const = 0;
|
|
||||||
// Memory usage of the index block
|
|
||||||
virtual size_t usable_size() const = 0;
|
|
||||||
// return the statistics pointer
|
|
||||||
virtual Statistics* statistics() const { return statistics_; }
|
|
||||||
// Report an approximation of how much memory has been used other than memory
|
|
||||||
// that was allocated in block cache.
|
|
||||||
virtual size_t ApproximateMemoryUsage() const = 0;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
const Comparator* comparator_;
|
|
||||||
|
|
||||||
private:
|
|
||||||
Statistics* statistics_;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Index that allows binary search lookup in a two-level index structure.
|
// Index that allows binary search lookup in a two-level index structure.
|
||||||
class PartitionIndexReader : public IndexReader {
|
class PartitionIndexReader : public IndexReader {
|
||||||
public:
|
public:
|
||||||
@ -397,118 +366,6 @@ class HashIndexReader : public IndexReader {
|
|||||||
BlockContents prefixes_contents_;
|
BlockContents prefixes_contents_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// CachableEntry represents the entries that *may* be fetched from block cache.
|
|
||||||
// field `value` is the item we want to get.
|
|
||||||
// field `cache_handle` is the cache handle to the block cache. If the value
|
|
||||||
// was not read from cache, `cache_handle` will be nullptr.
|
|
||||||
template <class TValue>
|
|
||||||
struct BlockBasedTable::CachableEntry {
|
|
||||||
CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
|
|
||||||
: value(_value), cache_handle(_cache_handle) {}
|
|
||||||
CachableEntry() : CachableEntry(nullptr, nullptr) {}
|
|
||||||
void Release(Cache* cache) {
|
|
||||||
if (cache_handle) {
|
|
||||||
cache->Release(cache_handle);
|
|
||||||
value = nullptr;
|
|
||||||
cache_handle = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bool IsSet() const { return cache_handle != nullptr; }
|
|
||||||
|
|
||||||
TValue* value = nullptr;
|
|
||||||
// if the entry is from the cache, cache_handle will be populated.
|
|
||||||
Cache::Handle* cache_handle = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct BlockBasedTable::Rep {
|
|
||||||
Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options,
|
|
||||||
const BlockBasedTableOptions& _table_opt,
|
|
||||||
const InternalKeyComparator& _internal_comparator, bool skip_filters)
|
|
||||||
: ioptions(_ioptions),
|
|
||||||
env_options(_env_options),
|
|
||||||
table_options(_table_opt),
|
|
||||||
filter_policy(skip_filters ? nullptr : _table_opt.filter_policy.get()),
|
|
||||||
internal_comparator(_internal_comparator),
|
|
||||||
filter_type(FilterType::kNoFilter),
|
|
||||||
whole_key_filtering(_table_opt.whole_key_filtering),
|
|
||||||
prefix_filtering(true),
|
|
||||||
range_del_handle(BlockHandle::NullBlockHandle()),
|
|
||||||
global_seqno(kDisableGlobalSequenceNumber) {}
|
|
||||||
|
|
||||||
const ImmutableCFOptions& ioptions;
|
|
||||||
const EnvOptions& env_options;
|
|
||||||
const BlockBasedTableOptions& table_options;
|
|
||||||
const FilterPolicy* const filter_policy;
|
|
||||||
const InternalKeyComparator& internal_comparator;
|
|
||||||
Status status;
|
|
||||||
unique_ptr<RandomAccessFileReader> file;
|
|
||||||
char cache_key_prefix[kMaxCacheKeyPrefixSize];
|
|
||||||
size_t cache_key_prefix_size = 0;
|
|
||||||
char persistent_cache_key_prefix[kMaxCacheKeyPrefixSize];
|
|
||||||
size_t persistent_cache_key_prefix_size = 0;
|
|
||||||
char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize];
|
|
||||||
size_t compressed_cache_key_prefix_size = 0;
|
|
||||||
uint64_t dummy_index_reader_offset =
|
|
||||||
0; // ID that is unique for the block cache.
|
|
||||||
PersistentCacheOptions persistent_cache_options;
|
|
||||||
|
|
||||||
// Footer contains the fixed table information
|
|
||||||
Footer footer;
|
|
||||||
// index_reader and filter will be populated and used only when
|
|
||||||
// options.block_cache is nullptr; otherwise we will get the index block via
|
|
||||||
// the block cache.
|
|
||||||
unique_ptr<IndexReader> index_reader;
|
|
||||||
unique_ptr<FilterBlockReader> filter;
|
|
||||||
|
|
||||||
enum class FilterType {
|
|
||||||
kNoFilter,
|
|
||||||
kFullFilter,
|
|
||||||
kBlockFilter,
|
|
||||||
};
|
|
||||||
FilterType filter_type;
|
|
||||||
BlockHandle filter_handle;
|
|
||||||
|
|
||||||
std::shared_ptr<const TableProperties> table_properties;
|
|
||||||
// Block containing the data for the compression dictionary. We take ownership
|
|
||||||
// for the entire block struct, even though we only use its Slice member. This
|
|
||||||
// is easier because the Slice member depends on the continued existence of
|
|
||||||
// another member ("allocation").
|
|
||||||
std::unique_ptr<const BlockContents> compression_dict_block;
|
|
||||||
BlockBasedTableOptions::IndexType index_type;
|
|
||||||
bool hash_index_allow_collision;
|
|
||||||
bool whole_key_filtering;
|
|
||||||
bool prefix_filtering;
|
|
||||||
// TODO(kailiu) It is very ugly to use internal key in table, since table
|
|
||||||
// module should not be relying on db module. However to make things easier
|
|
||||||
// and compatible with existing code, we introduce a wrapper that allows
|
|
||||||
// block to extract prefix without knowing if a key is internal or not.
|
|
||||||
unique_ptr<SliceTransform> internal_prefix_transform;
|
|
||||||
|
|
||||||
// only used in level 0 files:
|
|
||||||
// when pin_l0_filter_and_index_blocks_in_cache is true, we do use the
|
|
||||||
// LRU cache, but we always keep the filter & idndex block's handle checked
|
|
||||||
// out here (=we don't call Release()), plus the parsed out objects
|
|
||||||
// the LRU cache will never push flush them out, hence they're pinned
|
|
||||||
CachableEntry<FilterBlockReader> filter_entry;
|
|
||||||
CachableEntry<IndexReader> index_entry;
|
|
||||||
// range deletion meta-block is pinned through reader's lifetime when LRU
|
|
||||||
// cache is enabled.
|
|
||||||
CachableEntry<Block> range_del_entry;
|
|
||||||
BlockHandle range_del_handle;
|
|
||||||
|
|
||||||
// If global_seqno is used, all Keys in this file will have the same
|
|
||||||
// seqno with value `global_seqno`.
|
|
||||||
//
|
|
||||||
// A value of kDisableGlobalSequenceNumber means that this feature is disabled
|
|
||||||
// and every key have it's own seqno.
|
|
||||||
SequenceNumber global_seqno;
|
|
||||||
};
|
|
||||||
|
|
||||||
BlockBasedTable::~BlockBasedTable() {
|
|
||||||
Close();
|
|
||||||
delete rep_;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to setup the cache key's prefix for the Table.
|
// Helper function to setup the cache key's prefix for the Table.
|
||||||
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) {
|
void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep, uint64_t file_size) {
|
||||||
assert(kMaxCacheKeyPrefixSize >= 10);
|
assert(kMaxCacheKeyPrefixSize >= 10);
|
||||||
@ -850,7 +707,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
|
|||||||
|
|
||||||
// Set filter block
|
// Set filter block
|
||||||
if (rep->filter_policy) {
|
if (rep->filter_policy) {
|
||||||
rep->filter.reset(ReadFilter(rep));
|
rep->filter.reset(new_table->ReadFilter(rep));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
delete index_reader;
|
delete index_reader;
|
||||||
@ -1087,7 +944,7 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
FilterBlockReader* BlockBasedTable::ReadFilter(Rep* rep) {
|
FilterBlockReader* BlockBasedTable::ReadFilter(Rep* rep) const {
|
||||||
// TODO: We might want to unify with ReadBlockFromFile() if we start
|
// TODO: We might want to unify with ReadBlockFromFile() if we start
|
||||||
// requiring checksum verification in Table::Open.
|
// requiring checksum verification in Table::Open.
|
||||||
if (rep->filter_type == Rep::FilterType::kNoFilter) {
|
if (rep->filter_type == Rep::FilterType::kNoFilter) {
|
||||||
|
@ -20,6 +20,9 @@
|
|||||||
#include "rocksdb/statistics.h"
|
#include "rocksdb/statistics.h"
|
||||||
#include "rocksdb/status.h"
|
#include "rocksdb/status.h"
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
|
#include "table/filter_block.h"
|
||||||
|
#include "table/format.h"
|
||||||
|
#include "table/persistent_cache_helper.h"
|
||||||
#include "table/table_properties_internal.h"
|
#include "table/table_properties_internal.h"
|
||||||
#include "table/table_reader.h"
|
#include "table/table_reader.h"
|
||||||
#include "table/two_level_iterator.h"
|
#include "table/two_level_iterator.h"
|
||||||
@ -139,8 +142,45 @@ class BlockBasedTable : public TableReader {
|
|||||||
|
|
||||||
bool TEST_filter_block_preloaded() const;
|
bool TEST_filter_block_preloaded() const;
|
||||||
bool TEST_index_reader_preloaded() const;
|
bool TEST_index_reader_preloaded() const;
|
||||||
// Implementation of IndexReader will be exposed to internal cc file only.
|
|
||||||
class IndexReader;
|
// IndexReader is the interface that provide the functionality for index
|
||||||
|
// access.
|
||||||
|
class IndexReader {
|
||||||
|
public:
|
||||||
|
explicit IndexReader(const Comparator* comparator, Statistics* stats)
|
||||||
|
: comparator_(comparator), statistics_(stats) {}
|
||||||
|
|
||||||
|
virtual ~IndexReader() {}
|
||||||
|
|
||||||
|
// Create an iterator for index access.
|
||||||
|
// If iter is null then a new object is created on heap and the callee will
|
||||||
|
// have the ownership. If a non-null iter is passed in it will be used, and
|
||||||
|
// the returned value is either the same as iter or a new on-heap object
|
||||||
|
// that
|
||||||
|
// wrapps the passed iter. In the latter case the return value would point
|
||||||
|
// to
|
||||||
|
// a different object then iter and the callee has the ownership of the
|
||||||
|
// returned object.
|
||||||
|
virtual InternalIterator* NewIterator(BlockIter* iter = nullptr,
|
||||||
|
bool total_order_seek = true) = 0;
|
||||||
|
|
||||||
|
// The size of the index.
|
||||||
|
virtual size_t size() const = 0;
|
||||||
|
// Memory usage of the index block
|
||||||
|
virtual size_t usable_size() const = 0;
|
||||||
|
// return the statistics pointer
|
||||||
|
virtual Statistics* statistics() const { return statistics_; }
|
||||||
|
// Report an approximation of how much memory has been used other than
|
||||||
|
// memory
|
||||||
|
// that was allocated in block cache.
|
||||||
|
virtual size_t ApproximateMemoryUsage() const = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const Comparator* comparator_;
|
||||||
|
|
||||||
|
private:
|
||||||
|
Statistics* statistics_;
|
||||||
|
};
|
||||||
|
|
||||||
static Slice GetCacheKey(const char* cache_key_prefix,
|
static Slice GetCacheKey(const char* cache_key_prefix,
|
||||||
size_t cache_key_prefix_size,
|
size_t cache_key_prefix_size,
|
||||||
@ -155,7 +195,6 @@ class BlockBasedTable : public TableReader {
|
|||||||
private:
|
private:
|
||||||
template <class TValue>
|
template <class TValue>
|
||||||
struct CachableEntry;
|
struct CachableEntry;
|
||||||
|
|
||||||
struct Rep;
|
struct Rep;
|
||||||
Rep* rep_;
|
Rep* rep_;
|
||||||
bool compaction_optimized_;
|
bool compaction_optimized_;
|
||||||
@ -251,7 +290,7 @@ class BlockBasedTable : public TableReader {
|
|||||||
std::unique_ptr<InternalIterator>* iter);
|
std::unique_ptr<InternalIterator>* iter);
|
||||||
|
|
||||||
// Create the filter from the filter block.
|
// Create the filter from the filter block.
|
||||||
static FilterBlockReader* ReadFilter(Rep* rep);
|
FilterBlockReader* ReadFilter(Rep* rep) const;
|
||||||
|
|
||||||
static void SetupCacheKeyPrefix(Rep* rep, uint64_t file_size);
|
static void SetupCacheKeyPrefix(Rep* rep, uint64_t file_size);
|
||||||
|
|
||||||
@ -290,4 +329,112 @@ class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
|
|||||||
bool skip_filters_;
|
bool skip_filters_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// CachableEntry represents the entries that *may* be fetched from block cache.
|
||||||
|
// field `value` is the item we want to get.
|
||||||
|
// field `cache_handle` is the cache handle to the block cache. If the value
|
||||||
|
// was not read from cache, `cache_handle` will be nullptr.
|
||||||
|
template <class TValue>
|
||||||
|
struct BlockBasedTable::CachableEntry {
|
||||||
|
CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
|
||||||
|
: value(_value), cache_handle(_cache_handle) {}
|
||||||
|
CachableEntry() : CachableEntry(nullptr, nullptr) {}
|
||||||
|
void Release(Cache* cache) {
|
||||||
|
if (cache_handle) {
|
||||||
|
cache->Release(cache_handle);
|
||||||
|
value = nullptr;
|
||||||
|
cache_handle = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool IsSet() const { return cache_handle != nullptr; }
|
||||||
|
|
||||||
|
TValue* value = nullptr;
|
||||||
|
// if the entry is from the cache, cache_handle will be populated.
|
||||||
|
Cache::Handle* cache_handle = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BlockBasedTable::Rep {
|
||||||
|
Rep(const ImmutableCFOptions& _ioptions, const EnvOptions& _env_options,
|
||||||
|
const BlockBasedTableOptions& _table_opt,
|
||||||
|
const InternalKeyComparator& _internal_comparator, bool skip_filters)
|
||||||
|
: ioptions(_ioptions),
|
||||||
|
env_options(_env_options),
|
||||||
|
table_options(_table_opt),
|
||||||
|
filter_policy(skip_filters ? nullptr : _table_opt.filter_policy.get()),
|
||||||
|
internal_comparator(_internal_comparator),
|
||||||
|
filter_type(FilterType::kNoFilter),
|
||||||
|
whole_key_filtering(_table_opt.whole_key_filtering),
|
||||||
|
prefix_filtering(true),
|
||||||
|
range_del_handle(BlockHandle::NullBlockHandle()),
|
||||||
|
global_seqno(kDisableGlobalSequenceNumber) {}
|
||||||
|
|
||||||
|
const ImmutableCFOptions& ioptions;
|
||||||
|
const EnvOptions& env_options;
|
||||||
|
const BlockBasedTableOptions& table_options;
|
||||||
|
const FilterPolicy* const filter_policy;
|
||||||
|
const InternalKeyComparator& internal_comparator;
|
||||||
|
Status status;
|
||||||
|
unique_ptr<RandomAccessFileReader> file;
|
||||||
|
char cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||||
|
size_t cache_key_prefix_size = 0;
|
||||||
|
char persistent_cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||||
|
size_t persistent_cache_key_prefix_size = 0;
|
||||||
|
char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize];
|
||||||
|
size_t compressed_cache_key_prefix_size = 0;
|
||||||
|
uint64_t dummy_index_reader_offset =
|
||||||
|
0; // ID that is unique for the block cache.
|
||||||
|
PersistentCacheOptions persistent_cache_options;
|
||||||
|
|
||||||
|
// Footer contains the fixed table information
|
||||||
|
Footer footer;
|
||||||
|
// index_reader and filter will be populated and used only when
|
||||||
|
// options.block_cache is nullptr; otherwise we will get the index block via
|
||||||
|
// the block cache.
|
||||||
|
unique_ptr<IndexReader> index_reader;
|
||||||
|
unique_ptr<FilterBlockReader> filter;
|
||||||
|
|
||||||
|
enum class FilterType {
|
||||||
|
kNoFilter,
|
||||||
|
kFullFilter,
|
||||||
|
kBlockFilter,
|
||||||
|
kPartitionedFilter,
|
||||||
|
};
|
||||||
|
FilterType filter_type;
|
||||||
|
BlockHandle filter_handle;
|
||||||
|
|
||||||
|
std::shared_ptr<const TableProperties> table_properties;
|
||||||
|
// Block containing the data for the compression dictionary. We take ownership
|
||||||
|
// for the entire block struct, even though we only use its Slice member. This
|
||||||
|
// is easier because the Slice member depends on the continued existence of
|
||||||
|
// another member ("allocation").
|
||||||
|
std::unique_ptr<const BlockContents> compression_dict_block;
|
||||||
|
BlockBasedTableOptions::IndexType index_type;
|
||||||
|
bool hash_index_allow_collision;
|
||||||
|
bool whole_key_filtering;
|
||||||
|
bool prefix_filtering;
|
||||||
|
// TODO(kailiu) It is very ugly to use internal key in table, since table
|
||||||
|
// module should not be relying on db module. However to make things easier
|
||||||
|
// and compatible with existing code, we introduce a wrapper that allows
|
||||||
|
// block to extract prefix without knowing if a key is internal or not.
|
||||||
|
unique_ptr<SliceTransform> internal_prefix_transform;
|
||||||
|
|
||||||
|
// only used in level 0 files:
|
||||||
|
// when pin_l0_filter_and_index_blocks_in_cache is true, we do use the
|
||||||
|
// LRU cache, but we always keep the filter & idndex block's handle checked
|
||||||
|
// out here (=we don't call Release()), plus the parsed out objects
|
||||||
|
// the LRU cache will never push flush them out, hence they're pinned
|
||||||
|
CachableEntry<FilterBlockReader> filter_entry;
|
||||||
|
CachableEntry<IndexReader> index_entry;
|
||||||
|
// range deletion meta-block is pinned through reader's lifetime when LRU
|
||||||
|
// cache is enabled.
|
||||||
|
CachableEntry<Block> range_del_entry;
|
||||||
|
BlockHandle range_del_handle;
|
||||||
|
|
||||||
|
// If global_seqno is used, all Keys in this file will have the same
|
||||||
|
// seqno with value `global_seqno`.
|
||||||
|
//
|
||||||
|
// A value of kDisableGlobalSequenceNumber means that this feature is disabled
|
||||||
|
// and every key have it's own seqno.
|
||||||
|
SequenceNumber global_seqno;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace rocksdb
|
} // namespace rocksdb
|
||||||
|
@ -15,8 +15,8 @@
|
|||||||
#include "rocksdb/options.h"
|
#include "rocksdb/options.h"
|
||||||
#include "rocksdb/table.h"
|
#include "rocksdb/table.h"
|
||||||
|
|
||||||
#include "port/port.h" // noexcept
|
#include "port/port.h" // noexcept
|
||||||
#include "table/persistent_cache_helper.h"
|
#include "table/persistent_cache_options.h"
|
||||||
#include "util/cf_options.h"
|
#include "util/cf_options.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
52
table/index_builder.cc
Normal file
52
table/index_builder.cc
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "table/index_builder.h"
|
||||||
|
#include <assert.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <list>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "rocksdb/comparator.h"
|
||||||
|
#include "table/format.h"
|
||||||
|
#include "table/partitioned_filter_block.h"
|
||||||
|
|
||||||
|
// Without anonymous namespace here, we fail the warning -Wmissing-prototypes
|
||||||
|
namespace rocksdb {
|
||||||
|
// using namespace rocksdb;
|
||||||
|
// Create a index builder based on its type.
|
||||||
|
IndexBuilder* IndexBuilder::CreateIndexBuilder(
|
||||||
|
BlockBasedTableOptions::IndexType index_type,
|
||||||
|
const InternalKeyComparator* comparator,
|
||||||
|
const SliceTransform* prefix_extractor, int index_block_restart_interval,
|
||||||
|
uint64_t index_per_partition) {
|
||||||
|
switch (index_type) {
|
||||||
|
case BlockBasedTableOptions::kBinarySearch: {
|
||||||
|
return new ShortenedIndexBuilder(comparator,
|
||||||
|
index_block_restart_interval);
|
||||||
|
}
|
||||||
|
case BlockBasedTableOptions::kHashSearch: {
|
||||||
|
return new HashIndexBuilder(comparator, prefix_extractor,
|
||||||
|
index_block_restart_interval);
|
||||||
|
}
|
||||||
|
case BlockBasedTableOptions::kTwoLevelIndexSearch: {
|
||||||
|
return new PartitionIndexBuilder(comparator, prefix_extractor,
|
||||||
|
index_per_partition,
|
||||||
|
index_block_restart_interval);
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
assert(!"Do not recognize the index type ");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// impossible.
|
||||||
|
assert(false);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
} // namespace rocksdb
|
265
table/index_builder.h
Normal file
265
table/index_builder.h
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "rocksdb/comparator.h"
|
||||||
|
#include "table/block_based_table_factory.h"
|
||||||
|
#include "table/block_builder.h"
|
||||||
|
#include "table/format.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
// The interface for building index.
|
||||||
|
// Instruction for adding a new concrete IndexBuilder:
|
||||||
|
// 1. Create a subclass instantiated from IndexBuilder.
|
||||||
|
// 2. Add a new entry associated with that subclass in TableOptions::IndexType.
|
||||||
|
// 3. Add a create function for the new subclass in CreateIndexBuilder.
|
||||||
|
// Note: we can devise more advanced design to simplify the process for adding
|
||||||
|
// new subclass, which will, on the other hand, increase the code complexity and
|
||||||
|
// catch unwanted attention from readers. Given that we won't add/change
|
||||||
|
// indexes frequently, it makes sense to just embrace a more straightforward
|
||||||
|
// design that just works.
|
||||||
|
class IndexBuilder {
|
||||||
|
public:
|
||||||
|
static IndexBuilder* CreateIndexBuilder(
|
||||||
|
BlockBasedTableOptions::IndexType index_type,
|
||||||
|
const InternalKeyComparator* comparator,
|
||||||
|
const SliceTransform* prefix_extractor, int index_block_restart_interval,
|
||||||
|
uint64_t index_per_partition);
|
||||||
|
|
||||||
|
// Index builder will construct a set of blocks which contain:
|
||||||
|
// 1. One primary index block.
|
||||||
|
// 2. (Optional) a set of metablocks that contains the metadata of the
|
||||||
|
// primary index.
|
||||||
|
struct IndexBlocks {
|
||||||
|
Slice index_block_contents;
|
||||||
|
std::unordered_map<std::string, Slice> meta_blocks;
|
||||||
|
};
|
||||||
|
explicit IndexBuilder(const InternalKeyComparator* comparator)
|
||||||
|
: comparator_(comparator) {}
|
||||||
|
|
||||||
|
virtual ~IndexBuilder() {}
|
||||||
|
|
||||||
|
// Add a new index entry to index block.
|
||||||
|
// To allow further optimization, we provide `last_key_in_current_block` and
|
||||||
|
// `first_key_in_next_block`, based on which the specific implementation can
|
||||||
|
// determine the best index key to be used for the index block.
|
||||||
|
// @last_key_in_current_block: this parameter maybe overridden with the value
|
||||||
|
// "substitute key".
|
||||||
|
// @first_key_in_next_block: it will be nullptr if the entry being added is
|
||||||
|
// the last one in the table
|
||||||
|
//
|
||||||
|
// REQUIRES: Finish() has not yet been called.
|
||||||
|
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||||
|
const Slice* first_key_in_next_block,
|
||||||
|
const BlockHandle& block_handle) = 0;
|
||||||
|
|
||||||
|
// This method will be called whenever a key is added. The subclasses may
|
||||||
|
// override OnKeyAdded() if they need to collect additional information.
|
||||||
|
virtual void OnKeyAdded(const Slice& key) {}
|
||||||
|
|
||||||
|
// Inform the index builder that all entries has been written. Block builder
|
||||||
|
// may therefore perform any operation required for block finalization.
|
||||||
|
//
|
||||||
|
// REQUIRES: Finish() has not yet been called.
|
||||||
|
inline Status Finish(IndexBlocks* index_blocks) {
|
||||||
|
// Throw away the changes to last_partition_block_handle. It has no effect
|
||||||
|
// on the first call to Finish anyway.
|
||||||
|
BlockHandle last_partition_block_handle;
|
||||||
|
return Finish(index_blocks, last_partition_block_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This override of Finish can be utilized to build the 2nd level index in
|
||||||
|
// PartitionIndexBuilder.
|
||||||
|
//
|
||||||
|
// index_blocks will be filled with the resulting index data. If the return
|
||||||
|
// value is Status::InComplete() then it means that the index is partitioned
|
||||||
|
// and the callee should keep calling Finish until Status::OK() is returned.
|
||||||
|
// In that case, last_partition_block_handle is pointer to the block written
|
||||||
|
// with the result of the last call to Finish. This can be utilized to build
|
||||||
|
// the second level index pointing to each block of partitioned indexes. The
|
||||||
|
// last call to Finish() that returns Status::OK() populates index_blocks with
|
||||||
|
// the 2nd level index content.
|
||||||
|
virtual Status Finish(IndexBlocks* index_blocks,
|
||||||
|
const BlockHandle& last_partition_block_handle) = 0;
|
||||||
|
|
||||||
|
// Get the estimated size for index block.
|
||||||
|
virtual size_t EstimatedSize() const = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const InternalKeyComparator* comparator_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// This index builder builds space-efficient index block.
|
||||||
|
//
|
||||||
|
// Optimizations:
|
||||||
|
// 1. Made block's `block_restart_interval` to be 1, which will avoid linear
|
||||||
|
// search when doing index lookup (can be disabled by setting
|
||||||
|
// index_block_restart_interval).
|
||||||
|
// 2. Shorten the key length for index block. Other than honestly using the
|
||||||
|
// last key in the data block as the index key, we instead find a shortest
|
||||||
|
// substitute key that serves the same function.
|
||||||
|
class ShortenedIndexBuilder : public IndexBuilder {
|
||||||
|
public:
|
||||||
|
explicit ShortenedIndexBuilder(const InternalKeyComparator* comparator,
|
||||||
|
int index_block_restart_interval)
|
||||||
|
: IndexBuilder(comparator),
|
||||||
|
index_block_builder_(index_block_restart_interval) {}
|
||||||
|
|
||||||
|
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||||
|
const Slice* first_key_in_next_block,
|
||||||
|
const BlockHandle& block_handle) override {
|
||||||
|
if (first_key_in_next_block != nullptr) {
|
||||||
|
comparator_->FindShortestSeparator(last_key_in_current_block,
|
||||||
|
*first_key_in_next_block);
|
||||||
|
} else {
|
||||||
|
comparator_->FindShortSuccessor(last_key_in_current_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string handle_encoding;
|
||||||
|
block_handle.EncodeTo(&handle_encoding);
|
||||||
|
index_block_builder_.Add(*last_key_in_current_block, handle_encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual Status Finish(
|
||||||
|
IndexBlocks* index_blocks,
|
||||||
|
const BlockHandle& last_partition_block_handle) override {
|
||||||
|
index_blocks->index_block_contents = index_block_builder_.Finish();
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual size_t EstimatedSize() const override {
|
||||||
|
return index_block_builder_.CurrentSizeEstimate();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
BlockBuilder index_block_builder_;
|
||||||
|
};
|
||||||
|
|
||||||
|
// HashIndexBuilder contains a binary-searchable primary index and the
|
||||||
|
// metadata for secondary hash index construction.
|
||||||
|
// The metadata for hash index consists two parts:
|
||||||
|
// - a metablock that compactly contains a sequence of prefixes. All prefixes
|
||||||
|
// are stored consectively without any metadata (like, prefix sizes) being
|
||||||
|
// stored, which is kept in the other metablock.
|
||||||
|
// - a metablock contains the metadata of the prefixes, including prefix size,
|
||||||
|
// restart index and number of block it spans. The format looks like:
|
||||||
|
//
|
||||||
|
// +-----------------+---------------------------+---------------------+
|
||||||
|
// <=prefix 1
|
||||||
|
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
|
||||||
|
// +-----------------+---------------------------+---------------------+
|
||||||
|
// <=prefix 2
|
||||||
|
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
|
||||||
|
// +-----------------+---------------------------+---------------------+
|
||||||
|
// | |
|
||||||
|
// | .... |
|
||||||
|
// | |
|
||||||
|
// +-----------------+---------------------------+---------------------+
|
||||||
|
// <=prefix n
|
||||||
|
// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
|
||||||
|
// +-----------------+---------------------------+---------------------+
|
||||||
|
//
|
||||||
|
// The reason of separating these two metablocks is to enable the efficiently
|
||||||
|
// reuse the first metablock during hash index construction without unnecessary
|
||||||
|
// data copy or small heap allocations for prefixes.
|
||||||
|
class HashIndexBuilder : public IndexBuilder {
|
||||||
|
public:
|
||||||
|
explicit HashIndexBuilder(const InternalKeyComparator* comparator,
|
||||||
|
const SliceTransform* hash_key_extractor,
|
||||||
|
int index_block_restart_interval)
|
||||||
|
: IndexBuilder(comparator),
|
||||||
|
primary_index_builder_(comparator, index_block_restart_interval),
|
||||||
|
hash_key_extractor_(hash_key_extractor) {}
|
||||||
|
|
||||||
|
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||||
|
const Slice* first_key_in_next_block,
|
||||||
|
const BlockHandle& block_handle) override {
|
||||||
|
++current_restart_index_;
|
||||||
|
primary_index_builder_.AddIndexEntry(last_key_in_current_block,
|
||||||
|
first_key_in_next_block, block_handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void OnKeyAdded(const Slice& key) override {
|
||||||
|
auto key_prefix = hash_key_extractor_->Transform(key);
|
||||||
|
bool is_first_entry = pending_block_num_ == 0;
|
||||||
|
|
||||||
|
// Keys may share the prefix
|
||||||
|
if (is_first_entry || pending_entry_prefix_ != key_prefix) {
|
||||||
|
if (!is_first_entry) {
|
||||||
|
FlushPendingPrefix();
|
||||||
|
}
|
||||||
|
|
||||||
|
// need a hard copy otherwise the underlying data changes all the time.
|
||||||
|
// TODO(kailiu) ToString() is expensive. We may speed up can avoid data
|
||||||
|
// copy.
|
||||||
|
pending_entry_prefix_ = key_prefix.ToString();
|
||||||
|
pending_block_num_ = 1;
|
||||||
|
pending_entry_index_ = static_cast<uint32_t>(current_restart_index_);
|
||||||
|
} else {
|
||||||
|
// entry number increments when keys share the prefix reside in
|
||||||
|
// different data blocks.
|
||||||
|
auto last_restart_index = pending_entry_index_ + pending_block_num_ - 1;
|
||||||
|
assert(last_restart_index <= current_restart_index_);
|
||||||
|
if (last_restart_index != current_restart_index_) {
|
||||||
|
++pending_block_num_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual Status Finish(
|
||||||
|
IndexBlocks* index_blocks,
|
||||||
|
const BlockHandle& last_partition_block_handle) override {
|
||||||
|
FlushPendingPrefix();
|
||||||
|
primary_index_builder_.Finish(index_blocks, last_partition_block_handle);
|
||||||
|
index_blocks->meta_blocks.insert(
|
||||||
|
{kHashIndexPrefixesBlock.c_str(), prefix_block_});
|
||||||
|
index_blocks->meta_blocks.insert(
|
||||||
|
{kHashIndexPrefixesMetadataBlock.c_str(), prefix_meta_block_});
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual size_t EstimatedSize() const override {
|
||||||
|
return primary_index_builder_.EstimatedSize() + prefix_block_.size() +
|
||||||
|
prefix_meta_block_.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void FlushPendingPrefix() {
|
||||||
|
prefix_block_.append(pending_entry_prefix_.data(),
|
||||||
|
pending_entry_prefix_.size());
|
||||||
|
PutVarint32Varint32Varint32(
|
||||||
|
&prefix_meta_block_,
|
||||||
|
static_cast<uint32_t>(pending_entry_prefix_.size()),
|
||||||
|
pending_entry_index_, pending_block_num_);
|
||||||
|
}
|
||||||
|
|
||||||
|
ShortenedIndexBuilder primary_index_builder_;
|
||||||
|
const SliceTransform* hash_key_extractor_;
|
||||||
|
|
||||||
|
// stores a sequence of prefixes
|
||||||
|
std::string prefix_block_;
|
||||||
|
// stores the metadata of prefixes
|
||||||
|
std::string prefix_meta_block_;
|
||||||
|
|
||||||
|
// The following 3 variables keeps unflushed prefix and its metadata.
|
||||||
|
// The details of block_num and entry_index can be found in
|
||||||
|
// "block_hash_index.{h,cc}"
|
||||||
|
uint32_t pending_block_num_ = 0;
|
||||||
|
uint32_t pending_entry_index_ = 0;
|
||||||
|
std::string pending_entry_prefix_;
|
||||||
|
|
||||||
|
uint64_t current_restart_index_ = 0;
|
||||||
|
};
|
||||||
|
} // namespace rocksdb
|
84
table/partitioned_filter_block.cc
Normal file
84
table/partitioned_filter_block.cc
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "table/partitioned_filter_block.h"
|
||||||
|
|
||||||
|
#include "port/port.h"
|
||||||
|
#include "util/coding.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
PartitionIndexBuilder::PartitionIndexBuilder(
|
||||||
|
const InternalKeyComparator* comparator,
|
||||||
|
const SliceTransform* prefix_extractor, const uint64_t index_per_partition,
|
||||||
|
int index_block_restart_interval)
|
||||||
|
: IndexBuilder(comparator),
|
||||||
|
prefix_extractor_(prefix_extractor),
|
||||||
|
index_block_builder_(index_block_restart_interval),
|
||||||
|
index_per_partition_(index_per_partition),
|
||||||
|
index_block_restart_interval_(index_block_restart_interval) {
|
||||||
|
sub_index_builder_ =
|
||||||
|
CreateIndexBuilder(sub_type_, comparator_, prefix_extractor_,
|
||||||
|
index_block_restart_interval_, index_per_partition_);
|
||||||
|
}
|
||||||
|
|
||||||
|
PartitionIndexBuilder::~PartitionIndexBuilder() { delete sub_index_builder_; }
|
||||||
|
|
||||||
|
void PartitionIndexBuilder::AddIndexEntry(
|
||||||
|
std::string* last_key_in_current_block,
|
||||||
|
const Slice* first_key_in_next_block, const BlockHandle& block_handle) {
|
||||||
|
sub_index_builder_->AddIndexEntry(last_key_in_current_block,
|
||||||
|
first_key_in_next_block, block_handle);
|
||||||
|
num_indexes++;
|
||||||
|
if (UNLIKELY(first_key_in_next_block == nullptr)) { // no more keys
|
||||||
|
entries_.push_back({std::string(*last_key_in_current_block),
|
||||||
|
std::unique_ptr<IndexBuilder>(sub_index_builder_)});
|
||||||
|
sub_index_builder_ = nullptr;
|
||||||
|
} else if (num_indexes % index_per_partition_ == 0) {
|
||||||
|
entries_.push_back({std::string(*last_key_in_current_block),
|
||||||
|
std::unique_ptr<IndexBuilder>(sub_index_builder_)});
|
||||||
|
sub_index_builder_ =
|
||||||
|
CreateIndexBuilder(sub_type_, comparator_, prefix_extractor_,
|
||||||
|
index_block_restart_interval_, index_per_partition_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Status PartitionIndexBuilder::Finish(
|
||||||
|
IndexBlocks* index_blocks, const BlockHandle& last_partition_block_handle) {
|
||||||
|
assert(!entries_.empty());
|
||||||
|
// It must be set to null after last key is added
|
||||||
|
assert(sub_index_builder_ == nullptr);
|
||||||
|
if (finishing == true) {
|
||||||
|
Entry& last_entry = entries_.front();
|
||||||
|
std::string handle_encoding;
|
||||||
|
last_partition_block_handle.EncodeTo(&handle_encoding);
|
||||||
|
index_block_builder_.Add(last_entry.key, handle_encoding);
|
||||||
|
entries_.pop_front();
|
||||||
|
}
|
||||||
|
// If there is no sub_index left, then return the 2nd level index.
|
||||||
|
if (UNLIKELY(entries_.empty())) {
|
||||||
|
index_blocks->index_block_contents = index_block_builder_.Finish();
|
||||||
|
return Status::OK();
|
||||||
|
} else {
|
||||||
|
// Finish the next partition index in line and Incomplete() to indicate we
|
||||||
|
// expect more calls to Finish
|
||||||
|
Entry& entry = entries_.front();
|
||||||
|
auto s = entry.value->Finish(index_blocks);
|
||||||
|
finishing = true;
|
||||||
|
return s.ok() ? Status::Incomplete() : s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t PartitionIndexBuilder::EstimatedSize() const {
|
||||||
|
size_t total = 0;
|
||||||
|
for (auto it = entries_.begin(); it != entries_.end(); ++it) {
|
||||||
|
total += it->value->EstimatedSize();
|
||||||
|
}
|
||||||
|
total += index_block_builder_.CurrentSizeEstimate();
|
||||||
|
total +=
|
||||||
|
sub_index_builder_ == nullptr ? 0 : sub_index_builder_->EstimatedSize();
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
65
table/partitioned_filter_block.h
Normal file
65
table/partitioned_filter_block.h
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <list>
|
||||||
|
#include <string>
|
||||||
|
#include "rocksdb/options.h"
|
||||||
|
#include "rocksdb/slice.h"
|
||||||
|
#include "rocksdb/slice_transform.h"
|
||||||
|
#include "util/hash.h"
|
||||||
|
|
||||||
|
#include "table/index_builder.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IndexBuilder for two-level indexing. Internally it creates a new index for
|
||||||
|
* each partition and Finish then in order when Finish is called on it
|
||||||
|
* continiously until Status::OK() is returned.
|
||||||
|
*
|
||||||
|
* The format on the disk would be I I I I I I IP where I is block containing a
|
||||||
|
* partition of indexes built using ShortenedIndexBuilder and IP is a block
|
||||||
|
* containing a secondary index on the partitions, built using
|
||||||
|
* ShortenedIndexBuilder.
|
||||||
|
*/
|
||||||
|
class PartitionIndexBuilder : public IndexBuilder {
|
||||||
|
public:
|
||||||
|
explicit PartitionIndexBuilder(const InternalKeyComparator* comparator,
|
||||||
|
const SliceTransform* prefix_extractor,
|
||||||
|
const uint64_t index_per_partition,
|
||||||
|
int index_block_restart_interval);
|
||||||
|
|
||||||
|
virtual ~PartitionIndexBuilder();
|
||||||
|
|
||||||
|
virtual void AddIndexEntry(std::string* last_key_in_current_block,
|
||||||
|
const Slice* first_key_in_next_block,
|
||||||
|
const BlockHandle& block_handle);
|
||||||
|
|
||||||
|
virtual Status Finish(IndexBlocks* index_blocks,
|
||||||
|
const BlockHandle& last_partition_block_handle);
|
||||||
|
|
||||||
|
virtual size_t EstimatedSize() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static const BlockBasedTableOptions::IndexType sub_type_ =
|
||||||
|
BlockBasedTableOptions::kBinarySearch;
|
||||||
|
struct Entry {
|
||||||
|
std::string key;
|
||||||
|
std::unique_ptr<IndexBuilder> value;
|
||||||
|
};
|
||||||
|
std::list<Entry> entries_; // list of partitioned indexes and their keys
|
||||||
|
const SliceTransform* prefix_extractor_;
|
||||||
|
BlockBuilder index_block_builder_; // top-level index builder
|
||||||
|
IndexBuilder* sub_index_builder_; // the active partition index builder
|
||||||
|
uint64_t index_per_partition_;
|
||||||
|
int index_block_restart_interval_;
|
||||||
|
uint64_t num_indexes = 0;
|
||||||
|
bool finishing =
|
||||||
|
false; // true if Finish is called once but not complete yet.
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -2,7 +2,9 @@
|
|||||||
// This source code is licensed under the BSD-style license found in the
|
// This source code is licensed under the BSD-style license found in the
|
||||||
// LICENSE file in the root directory of this source tree. An additional grant
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
// of patent rights can be found in the PATENTS file in the same directory.
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
#include "table/persistent_cache_helper.h"
|
#include "table/persistent_cache_helper.h"
|
||||||
|
#include "table/block_based_table_reader.h"
|
||||||
#include "table/format.h"
|
#include "table/format.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
@ -6,33 +6,14 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "table/block_based_table_reader.h"
|
#include "table/format.h"
|
||||||
|
#include "table/persistent_cache_options.h"
|
||||||
#include "util/statistics.h"
|
#include "util/statistics.h"
|
||||||
|
|
||||||
namespace rocksdb {
|
namespace rocksdb {
|
||||||
|
|
||||||
struct BlockContents;
|
struct BlockContents;
|
||||||
|
|
||||||
// PersistentCacheOptions
|
|
||||||
//
|
|
||||||
// This describe the caching behavior for page cache
|
|
||||||
// This is used to pass the context for caching and the cache handle
|
|
||||||
struct PersistentCacheOptions {
|
|
||||||
PersistentCacheOptions() {}
|
|
||||||
explicit PersistentCacheOptions(
|
|
||||||
const std::shared_ptr<PersistentCache>& _persistent_cache,
|
|
||||||
const std::string _key_prefix, Statistics* const _statistics)
|
|
||||||
: persistent_cache(_persistent_cache),
|
|
||||||
key_prefix(_key_prefix),
|
|
||||||
statistics(_statistics) {}
|
|
||||||
|
|
||||||
virtual ~PersistentCacheOptions() {}
|
|
||||||
|
|
||||||
std::shared_ptr<PersistentCache> persistent_cache;
|
|
||||||
std::string key_prefix;
|
|
||||||
Statistics* statistics = nullptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
// PersistentCacheHelper
|
// PersistentCacheHelper
|
||||||
//
|
//
|
||||||
// Encapsulates some of the helper logic for read and writing from the cache
|
// Encapsulates some of the helper logic for read and writing from the cache
|
||||||
|
34
table/persistent_cache_options.h
Normal file
34
table/persistent_cache_options.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "include/rocksdb/persistent_cache.h"
|
||||||
|
#include "util/statistics.h"
|
||||||
|
|
||||||
|
namespace rocksdb {
|
||||||
|
|
||||||
|
// PersistentCacheOptions
|
||||||
|
//
|
||||||
|
// This describe the caching behavior for page cache
|
||||||
|
// This is used to pass the context for caching and the cache handle
|
||||||
|
struct PersistentCacheOptions {
|
||||||
|
PersistentCacheOptions() {}
|
||||||
|
explicit PersistentCacheOptions(
|
||||||
|
const std::shared_ptr<PersistentCache>& _persistent_cache,
|
||||||
|
const std::string _key_prefix, Statistics* const _statistics)
|
||||||
|
: persistent_cache(_persistent_cache),
|
||||||
|
key_prefix(_key_prefix),
|
||||||
|
statistics(_statistics) {}
|
||||||
|
|
||||||
|
virtual ~PersistentCacheOptions() {}
|
||||||
|
|
||||||
|
std::shared_ptr<PersistentCache> persistent_cache;
|
||||||
|
std::string key_prefix;
|
||||||
|
Statistics* statistics = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace rocksdb
|
Loading…
Reference in New Issue
Block a user