Check PrefixMayMatch on Seek()
Summary: As a follow-up diff for https://reviews.facebook.net/D17805, add optimization to check PrefixMayMatch on Seek() Test Plan: make all check Reviewers: igor, haobo, sdong, yhchiang, dhruba Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D17853
This commit is contained in:
parent
3995e801ab
commit
d642c60bdc
@ -6481,7 +6481,7 @@ TEST(DBTest, PrefixScan) {
|
||||
ASSERT_OK(iter->status());
|
||||
delete iter;
|
||||
ASSERT_EQ(count, 2);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 11);
|
||||
ASSERT_EQ(env_->random_read_counter_.Read(), 2);
|
||||
Close();
|
||||
delete options.filter_policy;
|
||||
}
|
||||
|
@ -193,7 +193,7 @@ Status TableCache::GetTableProperties(
|
||||
bool TableCache::PrefixMayMatch(const ReadOptions& options,
|
||||
const InternalKeyComparator& icomparator,
|
||||
const FileMetaData& file_meta,
|
||||
const Slice& internal_prefix, bool* table_io) {
|
||||
const Slice& internal_key, bool* table_io) {
|
||||
bool may_match = true;
|
||||
auto table_reader = file_meta.table_reader;
|
||||
Cache::Handle* table_handle = nullptr;
|
||||
@ -207,7 +207,7 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options,
|
||||
table_reader = GetTableReaderFromHandle(table_handle);
|
||||
}
|
||||
|
||||
may_match = table_reader->PrefixMayMatch(internal_prefix);
|
||||
may_match = table_reader->PrefixMayMatch(internal_key);
|
||||
|
||||
if (table_handle != nullptr) {
|
||||
// Need to release handle if it is generated from here.
|
||||
|
@ -217,49 +217,43 @@ class Version::LevelFileNumIterator : public Iterator {
|
||||
mutable EncodedFileMetaData current_value_;
|
||||
};
|
||||
|
||||
static Iterator* GetFileIterator(void* arg, const ReadOptions& options,
|
||||
const EnvOptions& soptions,
|
||||
const InternalKeyComparator& icomparator,
|
||||
const Slice& file_value, bool for_compaction) {
|
||||
TableCache* cache = reinterpret_cast<TableCache*>(arg);
|
||||
if (file_value.size() != sizeof(EncodedFileMetaData)) {
|
||||
return NewErrorIterator(
|
||||
Status::Corruption("FileReader invoked with unexpected value"));
|
||||
} else {
|
||||
const EncodedFileMetaData* encoded_meta =
|
||||
reinterpret_cast<const EncodedFileMetaData*>(file_value.data());
|
||||
FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
|
||||
meta.table_reader = encoded_meta->table_reader;
|
||||
return cache->NewIterator(options, soptions, icomparator, meta,
|
||||
nullptr /* don't need reference to table*/, for_compaction);
|
||||
}
|
||||
}
|
||||
class Version::LevelFileIteratorState : public TwoLevelIteratorState {
|
||||
public:
|
||||
LevelFileIteratorState(TableCache* table_cache,
|
||||
const ReadOptions& read_options, const EnvOptions& env_options,
|
||||
const InternalKeyComparator& icomparator, bool for_compaction,
|
||||
bool prefix_enabled)
|
||||
: TwoLevelIteratorState(prefix_enabled),
|
||||
table_cache_(table_cache), read_options_(read_options),
|
||||
env_options_(env_options), icomparator_(icomparator),
|
||||
for_compaction_(for_compaction) {}
|
||||
|
||||
bool Version::PrefixMayMatch(const ReadOptions& options,
|
||||
const EnvOptions& soptions,
|
||||
const Slice& internal_prefix,
|
||||
Iterator* level_iter) const {
|
||||
bool may_match = true;
|
||||
level_iter->Seek(internal_prefix);
|
||||
if (!level_iter->Valid()) {
|
||||
// we're past end of level
|
||||
may_match = false;
|
||||
} else if (ExtractUserKey(level_iter->key()).starts_with(
|
||||
ExtractUserKey(internal_prefix))) {
|
||||
// TODO(tylerharter): do we need this case? Or are we guaranteed
|
||||
// key() will always be the biggest value for this SST?
|
||||
may_match = true;
|
||||
} else {
|
||||
const EncodedFileMetaData* encoded_meta =
|
||||
reinterpret_cast<const EncodedFileMetaData*>(
|
||||
level_iter->value().data());
|
||||
FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
|
||||
meta.table_reader = encoded_meta->table_reader;
|
||||
may_match = cfd_->table_cache()->PrefixMayMatch(
|
||||
options, cfd_->internal_comparator(), meta, internal_prefix, nullptr);
|
||||
Iterator* NewSecondaryIterator(const Slice& meta_handle) override {
|
||||
if (meta_handle.size() != sizeof(EncodedFileMetaData)) {
|
||||
return NewErrorIterator(
|
||||
Status::Corruption("FileReader invoked with unexpected value"));
|
||||
} else {
|
||||
const EncodedFileMetaData* encoded_meta =
|
||||
reinterpret_cast<const EncodedFileMetaData*>(meta_handle.data());
|
||||
FileMetaData meta(encoded_meta->number, encoded_meta->file_size);
|
||||
meta.table_reader = encoded_meta->table_reader;
|
||||
return table_cache_->NewIterator(read_options_, env_options_,
|
||||
icomparator_, meta, nullptr /* don't need reference to table*/,
|
||||
for_compaction_);
|
||||
}
|
||||
}
|
||||
return may_match;
|
||||
}
|
||||
|
||||
bool PrefixMayMatch(const Slice& internal_key) override {
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
TableCache* table_cache_;
|
||||
const ReadOptions read_options_;
|
||||
const EnvOptions& env_options_;
|
||||
const InternalKeyComparator& icomparator_;
|
||||
bool for_compaction_;
|
||||
};
|
||||
|
||||
Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
|
||||
auto table_cache = cfd_->table_cache();
|
||||
@ -314,15 +308,6 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
|
||||
const EnvOptions& soptions,
|
||||
int level) const {
|
||||
Iterator* level_iter =
|
||||
new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level]);
|
||||
return NewTwoLevelIterator(level_iter, &GetFileIterator, cfd_->table_cache(),
|
||||
options, soptions, cfd_->internal_comparator());
|
||||
}
|
||||
|
||||
void Version::AddIterators(const ReadOptions& options,
|
||||
const EnvOptions& soptions,
|
||||
std::vector<Iterator*>* iters) {
|
||||
@ -337,7 +322,11 @@ void Version::AddIterators(const ReadOptions& options,
|
||||
// lazily.
|
||||
for (int level = 1; level < num_levels_; level++) {
|
||||
if (!files_[level].empty()) {
|
||||
iters->push_back(NewConcatenatingIterator(options, soptions, level));
|
||||
iters->push_back(NewTwoLevelIterator(new LevelFileIteratorState(
|
||||
cfd_->table_cache(), options, soptions,
|
||||
cfd_->internal_comparator(), false /* for_compaction */,
|
||||
cfd_->options()->prefix_extractor != nullptr),
|
||||
new LevelFileNumIterator(cfd_->internal_comparator(), &files_[level])));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2638,10 +2627,11 @@ void VersionSet::AddLiveFiles(std::vector<uint64_t>* live_list) {
|
||||
}
|
||||
|
||||
Iterator* VersionSet::MakeInputIterator(Compaction* c) {
|
||||
ReadOptions options;
|
||||
options.verify_checksums =
|
||||
c->column_family_data()->options()->verify_checksums_in_compaction;
|
||||
options.fill_cache = false;
|
||||
auto cfd = c->column_family_data();
|
||||
ReadOptions read_options;
|
||||
read_options.verify_checksums =
|
||||
cfd->options()->verify_checksums_in_compaction;
|
||||
read_options.fill_cache = false;
|
||||
|
||||
// Level-0 files have to be merged together. For other levels,
|
||||
// we will make a concatenating iterator per level.
|
||||
@ -2653,20 +2643,19 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
|
||||
if (!c->inputs(which)->empty()) {
|
||||
if (c->level() + which == 0) {
|
||||
for (const auto& file : *c->inputs(which)) {
|
||||
list[num++] = c->column_family_data()->table_cache()->NewIterator(
|
||||
options, storage_options_compactions_,
|
||||
c->column_family_data()->internal_comparator(), *file, nullptr,
|
||||
list[num++] = cfd->table_cache()->NewIterator(
|
||||
read_options, storage_options_compactions_,
|
||||
cfd->internal_comparator(), *file, nullptr,
|
||||
true /* for compaction */);
|
||||
}
|
||||
} else {
|
||||
// Create concatenating iterator for the files from this level
|
||||
list[num++] = NewTwoLevelIterator(
|
||||
new Version::LevelFileNumIterator(
|
||||
c->column_family_data()->internal_comparator(),
|
||||
c->inputs(which)),
|
||||
&GetFileIterator, c->column_family_data()->table_cache(), options,
|
||||
storage_options_, c->column_family_data()->internal_comparator(),
|
||||
true /* for compaction */);
|
||||
list[num++] = NewTwoLevelIterator(new Version::LevelFileIteratorState(
|
||||
cfd->table_cache(), read_options, storage_options_,
|
||||
cfd->internal_comparator(), true /* for_compaction */,
|
||||
false /* prefix enabled */),
|
||||
new Version::LevelFileNumIterator(cfd->internal_comparator(),
|
||||
c->inputs(which)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -219,11 +219,10 @@ class Version {
|
||||
friend class UniversalCompactionPicker;
|
||||
|
||||
class LevelFileNumIterator;
|
||||
Iterator* NewConcatenatingIterator(const ReadOptions&,
|
||||
const EnvOptions& soptions,
|
||||
int level) const;
|
||||
bool PrefixMayMatch(const ReadOptions& options, const EnvOptions& soptions,
|
||||
const Slice& internal_prefix, Iterator* level_iter) const;
|
||||
struct LevelFileIteratorState;
|
||||
|
||||
bool PrefixMayMatch(const ReadOptions& options, Iterator* level_iter,
|
||||
const Slice& internal_prefix) const;
|
||||
|
||||
// Sort all files for this version based on their file size and
|
||||
// record results in files_by_size_. The largest files are listed first.
|
||||
|
@ -642,94 +642,6 @@ FilterBlockReader* BlockBasedTable::ReadFilter (
|
||||
rep->options, block.data, block.heap_allocated);
|
||||
}
|
||||
|
||||
// Convert an index iterator value (i.e., an encoded BlockHandle)
|
||||
// into an iterator over the contents of the corresponding block.
|
||||
Iterator* BlockBasedTable::DataBlockReader(void* arg,
|
||||
const ReadOptions& options,
|
||||
const Slice& index_value,
|
||||
bool* didIO, bool for_compaction) {
|
||||
const bool no_io = (options.read_tier == kBlockCacheTier);
|
||||
BlockBasedTable* table = reinterpret_cast<BlockBasedTable*>(arg);
|
||||
Cache* block_cache = table->rep_->options.block_cache.get();
|
||||
Cache* block_cache_compressed = table->rep_->options.
|
||||
block_cache_compressed.get();
|
||||
CachableEntry<Block> block;
|
||||
|
||||
BlockHandle handle;
|
||||
Slice input = index_value;
|
||||
// We intentionally allow extra stuff in index_value so that we
|
||||
// can add more features in the future.
|
||||
Status s = handle.DecodeFrom(&input);
|
||||
|
||||
if (!s.ok()) {
|
||||
return NewErrorIterator(s);
|
||||
}
|
||||
|
||||
// If either block cache is enabled, we'll try to read from it.
|
||||
if (block_cache != nullptr || block_cache_compressed != nullptr) {
|
||||
Statistics* statistics = table->rep_->options.statistics.get();
|
||||
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||
char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||
Slice key, /* key to the block cache */
|
||||
ckey /* key to the compressed block cache */;
|
||||
|
||||
// create key for block cache
|
||||
if (block_cache != nullptr) {
|
||||
key = GetCacheKey(table->rep_->cache_key_prefix,
|
||||
table->rep_->cache_key_prefix_size, handle, cache_key);
|
||||
}
|
||||
|
||||
if (block_cache_compressed != nullptr) {
|
||||
ckey = GetCacheKey(table->rep_->compressed_cache_key_prefix,
|
||||
table->rep_->compressed_cache_key_prefix_size, handle,
|
||||
compressed_cache_key);
|
||||
}
|
||||
|
||||
s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
|
||||
statistics, options, &block);
|
||||
|
||||
if (block.value == nullptr && !no_io && options.fill_cache) {
|
||||
Histograms histogram = for_compaction ?
|
||||
READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS;
|
||||
Block* raw_block = nullptr;
|
||||
{
|
||||
StopWatch sw(table->rep_->options.env, statistics, histogram);
|
||||
s = ReadBlockFromFile(table->rep_->file.get(), options, handle,
|
||||
&raw_block, table->rep_->options.env, didIO,
|
||||
block_cache_compressed == nullptr);
|
||||
}
|
||||
|
||||
if (s.ok()) {
|
||||
s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed,
|
||||
options, statistics, &block, raw_block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Didn't get any data from block caches.
|
||||
if (block.value == nullptr) {
|
||||
if (no_io) {
|
||||
// Could not read from block_cache and can't do IO
|
||||
return NewErrorIterator(Status::Incomplete("no blocking io"));
|
||||
}
|
||||
s = ReadBlockFromFile(table->rep_->file.get(), options, handle,
|
||||
&block.value, table->rep_->options.env, didIO);
|
||||
}
|
||||
|
||||
Iterator* iter;
|
||||
if (block.value != nullptr) {
|
||||
iter = block.value->NewIterator(&table->rep_->internal_comparator);
|
||||
if (block.cache_handle != nullptr) {
|
||||
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
||||
block.cache_handle);
|
||||
} else {
|
||||
iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
|
||||
}
|
||||
} else {
|
||||
iter = NewErrorIterator(s);
|
||||
}
|
||||
return iter;
|
||||
}
|
||||
|
||||
BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
bool no_io) const {
|
||||
@ -838,13 +750,115 @@ Iterator* BlockBasedTable::NewIndexIterator(const ReadOptions& read_options) {
|
||||
return iter;
|
||||
}
|
||||
|
||||
Iterator* BlockBasedTable::DataBlockReader(
|
||||
void* arg, const ReadOptions& options, const EnvOptions& soptions,
|
||||
const InternalKeyComparator& icomparator, const Slice& index_value,
|
||||
bool for_compaction) {
|
||||
return DataBlockReader(arg, options, index_value, nullptr, for_compaction);
|
||||
// Convert an index iterator value (i.e., an encoded BlockHandle)
|
||||
// into an iterator over the contents of the corresponding block.
|
||||
Iterator* BlockBasedTable::NewDataBlockIterator(Rep* rep,
|
||||
const ReadOptions& ro, bool* didIO, const Slice& index_value) {
|
||||
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
||||
Cache* block_cache = rep->options.block_cache.get();
|
||||
Cache* block_cache_compressed = rep->options.
|
||||
block_cache_compressed.get();
|
||||
CachableEntry<Block> block;
|
||||
|
||||
BlockHandle handle;
|
||||
Slice input = index_value;
|
||||
// We intentionally allow extra stuff in index_value so that we
|
||||
// can add more features in the future.
|
||||
Status s = handle.DecodeFrom(&input);
|
||||
|
||||
if (!s.ok()) {
|
||||
return NewErrorIterator(s);
|
||||
}
|
||||
|
||||
// If either block cache is enabled, we'll try to read from it.
|
||||
if (block_cache != nullptr || block_cache_compressed != nullptr) {
|
||||
Statistics* statistics = rep->options.statistics.get();
|
||||
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||
char compressed_cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||
Slice key, /* key to the block cache */
|
||||
ckey /* key to the compressed block cache */;
|
||||
|
||||
// create key for block cache
|
||||
if (block_cache != nullptr) {
|
||||
key = GetCacheKey(rep->cache_key_prefix,
|
||||
rep->cache_key_prefix_size, handle, cache_key);
|
||||
}
|
||||
|
||||
if (block_cache_compressed != nullptr) {
|
||||
ckey = GetCacheKey(rep->compressed_cache_key_prefix,
|
||||
rep->compressed_cache_key_prefix_size, handle,
|
||||
compressed_cache_key);
|
||||
}
|
||||
|
||||
s = GetDataBlockFromCache(key, ckey, block_cache, block_cache_compressed,
|
||||
statistics, ro, &block);
|
||||
|
||||
if (block.value == nullptr && !no_io && ro.fill_cache) {
|
||||
Histograms histogram = READ_BLOCK_GET_MICROS;
|
||||
Block* raw_block = nullptr;
|
||||
{
|
||||
StopWatch sw(rep->options.env, statistics, histogram);
|
||||
s = ReadBlockFromFile(rep->file.get(), ro, handle,
|
||||
&raw_block, rep->options.env, didIO,
|
||||
block_cache_compressed == nullptr);
|
||||
}
|
||||
|
||||
if (s.ok()) {
|
||||
s = PutDataBlockToCache(key, ckey, block_cache, block_cache_compressed,
|
||||
ro, statistics, &block, raw_block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Didn't get any data from block caches.
|
||||
if (block.value == nullptr) {
|
||||
if (no_io) {
|
||||
// Could not read from block_cache and can't do IO
|
||||
return NewErrorIterator(Status::Incomplete("no blocking io"));
|
||||
}
|
||||
s = ReadBlockFromFile(rep->file.get(), ro, handle,
|
||||
&block.value, rep->options.env, didIO);
|
||||
}
|
||||
|
||||
Iterator* iter;
|
||||
if (block.value != nullptr) {
|
||||
iter = block.value->NewIterator(&rep->internal_comparator);
|
||||
if (block.cache_handle != nullptr) {
|
||||
iter->RegisterCleanup(&ReleaseCachedEntry, block_cache,
|
||||
block.cache_handle);
|
||||
} else {
|
||||
iter->RegisterCleanup(&DeleteHeldResource<Block>, block.value, nullptr);
|
||||
}
|
||||
} else {
|
||||
iter = NewErrorIterator(s);
|
||||
}
|
||||
return iter;
|
||||
}
|
||||
|
||||
class BlockBasedTable::BlockEntryIteratorState : public TwoLevelIteratorState {
|
||||
public:
|
||||
BlockEntryIteratorState(BlockBasedTable* table,
|
||||
const ReadOptions& read_options, bool* did_io)
|
||||
: TwoLevelIteratorState(table->rep_->options.prefix_extractor != nullptr),
|
||||
table_(table), read_options_(read_options), did_io_(did_io) {}
|
||||
|
||||
Iterator* NewSecondaryIterator(const Slice& index_value) override {
|
||||
return NewDataBlockIterator(table_->rep_, read_options_, did_io_,
|
||||
index_value);
|
||||
}
|
||||
|
||||
bool PrefixMayMatch(const Slice& internal_key) override {
|
||||
return table_->PrefixMayMatch(internal_key);
|
||||
}
|
||||
|
||||
private:
|
||||
// Don't own table_
|
||||
BlockBasedTable* table_;
|
||||
const ReadOptions read_options_;
|
||||
// Don't own did_io_
|
||||
bool* did_io_;
|
||||
};
|
||||
|
||||
// This will be broken if the user specifies an unusual implementation
|
||||
// of Options.comparator, or if the user specifies an unusual
|
||||
// definition of prefixes in Options.filter_policy. In particular, we
|
||||
@ -857,7 +871,13 @@ Iterator* BlockBasedTable::DataBlockReader(
|
||||
// Otherwise, this method guarantees no I/O will be incurred.
|
||||
//
|
||||
// REQUIRES: this method shouldn't be called while the DB lock is held.
|
||||
bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
|
||||
bool BlockBasedTable::PrefixMayMatch(const Slice& internal_key) {
|
||||
assert(rep_->options.prefix_extractor != nullptr);
|
||||
auto prefix = rep_->options.prefix_extractor->Transform(
|
||||
ExtractUserKey(internal_key));
|
||||
InternalKey internal_key_prefix(prefix, 0, kTypeValue);
|
||||
auto internal_prefix = internal_key_prefix.Encode();
|
||||
|
||||
bool may_match = true;
|
||||
Status s;
|
||||
|
||||
@ -918,11 +938,10 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) {
|
||||
return may_match;
|
||||
}
|
||||
|
||||
Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) {
|
||||
return NewTwoLevelIterator(NewIndexIterator(options),
|
||||
&BlockBasedTable::DataBlockReader,
|
||||
const_cast<BlockBasedTable*>(this), options,
|
||||
rep_->soptions, rep_->internal_comparator);
|
||||
Iterator* BlockBasedTable::NewIterator(const ReadOptions& read_options) {
|
||||
return NewTwoLevelIterator(new BlockEntryIteratorState(this, read_options,
|
||||
nullptr),
|
||||
NewIndexIterator(read_options));
|
||||
}
|
||||
|
||||
Status BlockBasedTable::Get(
|
||||
@ -953,7 +972,7 @@ Status BlockBasedTable::Get(
|
||||
} else {
|
||||
bool didIO = false;
|
||||
unique_ptr<Iterator> block_iter(
|
||||
DataBlockReader(this, read_options, iiter->value(), &didIO));
|
||||
NewDataBlockIterator(rep_, read_options, &didIO, iiter->value()));
|
||||
|
||||
if (read_options.read_tier && block_iter->status().IsIncomplete()) {
|
||||
// couldn't get block from block_cache
|
||||
@ -1050,10 +1069,8 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
|
||||
return HashIndexReader::Create(
|
||||
file, index_handle, env, comparator,
|
||||
[&](Iterator* index_iter) {
|
||||
return NewTwoLevelIterator(
|
||||
index_iter, &BlockBasedTable::DataBlockReader,
|
||||
const_cast<BlockBasedTable*>(this), ReadOptions(),
|
||||
rep_->soptions, rep_->internal_comparator);
|
||||
return NewTwoLevelIterator(new BlockEntryIteratorState(this,
|
||||
ReadOptions(), nullptr), index_iter);
|
||||
},
|
||||
rep_->internal_prefix_transform.get(), index_reader);
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ class BlockBasedTable : public TableReader {
|
||||
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
|
||||
unique_ptr<TableReader>* table_reader);
|
||||
|
||||
bool PrefixMayMatch(const Slice& internal_prefix) override;
|
||||
bool PrefixMayMatch(const Slice& internal_key) override;
|
||||
|
||||
// Returns a new iterator over the table contents.
|
||||
// The result of NewIterator() is initially invalid (caller must
|
||||
@ -111,13 +111,9 @@ class BlockBasedTable : public TableReader {
|
||||
Rep* rep_;
|
||||
bool compaction_optimized_;
|
||||
|
||||
static Iterator* DataBlockReader(void*, const ReadOptions&,
|
||||
const EnvOptions& soptions,
|
||||
const InternalKeyComparator& icomparator,
|
||||
const Slice&, bool for_compaction);
|
||||
|
||||
static Iterator* DataBlockReader(void*, const ReadOptions&, const Slice&,
|
||||
bool* didIO, bool for_compaction = false);
|
||||
struct BlockEntryIteratorState;
|
||||
static Iterator* NewDataBlockIterator(Rep* rep, const ReadOptions& ro,
|
||||
bool* didIO, const Slice& index_value);
|
||||
|
||||
// For the following two functions:
|
||||
// if `no_io == true`, we will not try to read filter/index from sst file
|
||||
|
@ -13,26 +13,17 @@
|
||||
#include "rocksdb/table.h"
|
||||
#include "table/block.h"
|
||||
#include "table/format.h"
|
||||
#include "table/iterator_wrapper.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
namespace {
|
||||
|
||||
typedef Iterator* (*BlockFunction)(void*, const ReadOptions&,
|
||||
const EnvOptions& soptions,
|
||||
const InternalKeyComparator& icomparator,
|
||||
const Slice&, bool for_compaction);
|
||||
|
||||
class TwoLevelIterator: public Iterator {
|
||||
public:
|
||||
TwoLevelIterator(Iterator* index_iter, BlockFunction block_function,
|
||||
void* arg, const ReadOptions& options,
|
||||
const EnvOptions& soptions,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
bool for_compaction);
|
||||
explicit TwoLevelIterator(TwoLevelIteratorState* state,
|
||||
Iterator* first_level_iter);
|
||||
|
||||
virtual ~TwoLevelIterator();
|
||||
virtual ~TwoLevelIterator() {}
|
||||
|
||||
virtual void Seek(const Slice& target);
|
||||
virtual void SeekToFirst();
|
||||
@ -41,22 +32,23 @@ class TwoLevelIterator: public Iterator {
|
||||
virtual void Prev();
|
||||
|
||||
virtual bool Valid() const {
|
||||
return data_iter_.Valid();
|
||||
return second_level_iter_.Valid();
|
||||
}
|
||||
virtual Slice key() const {
|
||||
assert(Valid());
|
||||
return data_iter_.key();
|
||||
return second_level_iter_.key();
|
||||
}
|
||||
virtual Slice value() const {
|
||||
assert(Valid());
|
||||
return data_iter_.value();
|
||||
return second_level_iter_.value();
|
||||
}
|
||||
virtual Status status() const {
|
||||
// It'd be nice if status() returned a const Status& instead of a Status
|
||||
if (!index_iter_.status().ok()) {
|
||||
return index_iter_.status();
|
||||
} else if (data_iter_.iter() != nullptr && !data_iter_.status().ok()) {
|
||||
return data_iter_.status();
|
||||
if (!first_level_iter_.status().ok()) {
|
||||
return first_level_iter_.status();
|
||||
} else if (second_level_iter_.iter() != nullptr &&
|
||||
!second_level_iter_.status().ok()) {
|
||||
return second_level_iter_.status();
|
||||
} else {
|
||||
return status_;
|
||||
}
|
||||
@ -68,135 +60,129 @@ class TwoLevelIterator: public Iterator {
|
||||
}
|
||||
void SkipEmptyDataBlocksForward();
|
||||
void SkipEmptyDataBlocksBackward();
|
||||
void SetDataIterator(Iterator* data_iter);
|
||||
void SetSecondLevelIterator(Iterator* iter);
|
||||
void InitDataBlock();
|
||||
|
||||
BlockFunction block_function_;
|
||||
void* arg_;
|
||||
const ReadOptions options_;
|
||||
const EnvOptions& soptions_;
|
||||
const InternalKeyComparator& internal_comparator_;
|
||||
std::unique_ptr<TwoLevelIteratorState> state_;
|
||||
IteratorWrapper first_level_iter_;
|
||||
IteratorWrapper second_level_iter_; // May be nullptr
|
||||
Status status_;
|
||||
IteratorWrapper index_iter_;
|
||||
IteratorWrapper data_iter_; // May be nullptr
|
||||
// If data_iter_ is non-nullptr, then "data_block_handle_" holds the
|
||||
// "index_value" passed to block_function_ to create the data_iter_.
|
||||
// If second_level_iter is non-nullptr, then "data_block_handle_" holds the
|
||||
// "index_value" passed to block_function_ to create the second_level_iter.
|
||||
std::string data_block_handle_;
|
||||
bool for_compaction_;
|
||||
};
|
||||
|
||||
TwoLevelIterator::TwoLevelIterator(
|
||||
Iterator* index_iter, BlockFunction block_function, void* arg,
|
||||
const ReadOptions& options, const EnvOptions& soptions,
|
||||
const InternalKeyComparator& internal_comparator, bool for_compaction)
|
||||
: block_function_(block_function),
|
||||
arg_(arg),
|
||||
options_(options),
|
||||
soptions_(soptions),
|
||||
internal_comparator_(internal_comparator),
|
||||
index_iter_(index_iter),
|
||||
data_iter_(nullptr),
|
||||
for_compaction_(for_compaction) {}
|
||||
|
||||
TwoLevelIterator::~TwoLevelIterator() {
|
||||
}
|
||||
TwoLevelIterator::TwoLevelIterator(TwoLevelIteratorState* state,
|
||||
Iterator* first_level_iter)
|
||||
: state_(state), first_level_iter_(first_level_iter) {}
|
||||
|
||||
void TwoLevelIterator::Seek(const Slice& target) {
|
||||
index_iter_.Seek(target);
|
||||
InitDataBlock();
|
||||
if (data_iter_.iter() != nullptr) data_iter_.Seek(target);
|
||||
SkipEmptyDataBlocksForward();
|
||||
if (state_->prefix_enabled && !state_->PrefixMayMatch(target)) {
|
||||
SetSecondLevelIterator(nullptr);
|
||||
} else {
|
||||
first_level_iter_.Seek(target);
|
||||
InitDataBlock();
|
||||
if (second_level_iter_.iter() != nullptr) {
|
||||
second_level_iter_.Seek(target);
|
||||
}
|
||||
SkipEmptyDataBlocksForward();
|
||||
}
|
||||
}
|
||||
|
||||
void TwoLevelIterator::SeekToFirst() {
|
||||
index_iter_.SeekToFirst();
|
||||
first_level_iter_.SeekToFirst();
|
||||
InitDataBlock();
|
||||
if (data_iter_.iter() != nullptr) data_iter_.SeekToFirst();
|
||||
if (second_level_iter_.iter() != nullptr) {
|
||||
second_level_iter_.SeekToFirst();
|
||||
}
|
||||
SkipEmptyDataBlocksForward();
|
||||
}
|
||||
|
||||
void TwoLevelIterator::SeekToLast() {
|
||||
index_iter_.SeekToLast();
|
||||
first_level_iter_.SeekToLast();
|
||||
InitDataBlock();
|
||||
if (data_iter_.iter() != nullptr) data_iter_.SeekToLast();
|
||||
if (second_level_iter_.iter() != nullptr) {
|
||||
second_level_iter_.SeekToLast();
|
||||
}
|
||||
SkipEmptyDataBlocksBackward();
|
||||
}
|
||||
|
||||
void TwoLevelIterator::Next() {
|
||||
assert(Valid());
|
||||
data_iter_.Next();
|
||||
second_level_iter_.Next();
|
||||
SkipEmptyDataBlocksForward();
|
||||
}
|
||||
|
||||
void TwoLevelIterator::Prev() {
|
||||
assert(Valid());
|
||||
data_iter_.Prev();
|
||||
second_level_iter_.Prev();
|
||||
SkipEmptyDataBlocksBackward();
|
||||
}
|
||||
|
||||
|
||||
void TwoLevelIterator::SkipEmptyDataBlocksForward() {
|
||||
while (data_iter_.iter() == nullptr || (!data_iter_.Valid() &&
|
||||
!data_iter_.status().IsIncomplete())) {
|
||||
while (second_level_iter_.iter() == nullptr ||
|
||||
(!second_level_iter_.Valid() &&
|
||||
!second_level_iter_.status().IsIncomplete())) {
|
||||
// Move to next block
|
||||
if (!index_iter_.Valid()) {
|
||||
SetDataIterator(nullptr);
|
||||
if (!first_level_iter_.Valid()) {
|
||||
SetSecondLevelIterator(nullptr);
|
||||
return;
|
||||
}
|
||||
index_iter_.Next();
|
||||
first_level_iter_.Next();
|
||||
InitDataBlock();
|
||||
if (data_iter_.iter() != nullptr) data_iter_.SeekToFirst();
|
||||
if (second_level_iter_.iter() != nullptr) {
|
||||
second_level_iter_.SeekToFirst();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TwoLevelIterator::SkipEmptyDataBlocksBackward() {
|
||||
while (data_iter_.iter() == nullptr || (!data_iter_.Valid() &&
|
||||
!data_iter_.status().IsIncomplete())) {
|
||||
while (second_level_iter_.iter() == nullptr ||
|
||||
(!second_level_iter_.Valid() &&
|
||||
!second_level_iter_.status().IsIncomplete())) {
|
||||
// Move to next block
|
||||
if (!index_iter_.Valid()) {
|
||||
SetDataIterator(nullptr);
|
||||
if (!first_level_iter_.Valid()) {
|
||||
SetSecondLevelIterator(nullptr);
|
||||
return;
|
||||
}
|
||||
index_iter_.Prev();
|
||||
first_level_iter_.Prev();
|
||||
InitDataBlock();
|
||||
if (data_iter_.iter() != nullptr) data_iter_.SeekToLast();
|
||||
if (second_level_iter_.iter() != nullptr) {
|
||||
second_level_iter_.SeekToLast();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void TwoLevelIterator::SetDataIterator(Iterator* data_iter) {
|
||||
if (data_iter_.iter() != nullptr) SaveError(data_iter_.status());
|
||||
data_iter_.Set(data_iter);
|
||||
void TwoLevelIterator::SetSecondLevelIterator(Iterator* iter) {
|
||||
if (second_level_iter_.iter() != nullptr) {
|
||||
SaveError(second_level_iter_.status());
|
||||
}
|
||||
second_level_iter_.Set(iter);
|
||||
}
|
||||
|
||||
void TwoLevelIterator::InitDataBlock() {
|
||||
if (!index_iter_.Valid()) {
|
||||
SetDataIterator(nullptr);
|
||||
if (!first_level_iter_.Valid()) {
|
||||
SetSecondLevelIterator(nullptr);
|
||||
} else {
|
||||
Slice handle = index_iter_.value();
|
||||
if (data_iter_.iter() != nullptr
|
||||
Slice handle = first_level_iter_.value();
|
||||
if (second_level_iter_.iter() != nullptr
|
||||
&& handle.compare(data_block_handle_) == 0) {
|
||||
// data_iter_ is already constructed with this iterator, so
|
||||
// second_level_iter is already constructed with this iterator, so
|
||||
// no need to change anything
|
||||
} else {
|
||||
Iterator* iter =
|
||||
(*block_function_)(arg_, options_, soptions_, internal_comparator_,
|
||||
handle, for_compaction_);
|
||||
Iterator* iter = state_->NewSecondaryIterator(handle);
|
||||
data_block_handle_.assign(handle.data(), handle.size());
|
||||
SetDataIterator(iter);
|
||||
SetSecondLevelIterator(iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Iterator* NewTwoLevelIterator(Iterator* index_iter,
|
||||
BlockFunction block_function, void* arg,
|
||||
const ReadOptions& options,
|
||||
const EnvOptions& soptions,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
bool for_compaction) {
|
||||
return new TwoLevelIterator(index_iter, block_function, arg, options,
|
||||
soptions, internal_comparator, for_compaction);
|
||||
Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state,
|
||||
Iterator* first_level_iter) {
|
||||
return new TwoLevelIterator(state, first_level_iter);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -10,12 +10,25 @@
|
||||
#pragma once
|
||||
#include "rocksdb/iterator.h"
|
||||
#include "rocksdb/env.h"
|
||||
#include "table/iterator_wrapper.h"
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
struct ReadOptions;
|
||||
class InternalKeyComparator;
|
||||
|
||||
struct TwoLevelIteratorState {
|
||||
explicit TwoLevelIteratorState(bool prefix_enabled)
|
||||
: prefix_enabled(prefix_enabled) {}
|
||||
|
||||
virtual ~TwoLevelIteratorState() {}
|
||||
virtual Iterator* NewSecondaryIterator(const Slice& handle) = 0;
|
||||
virtual bool PrefixMayMatch(const Slice& internal_key) = 0;
|
||||
|
||||
bool prefix_enabled;
|
||||
};
|
||||
|
||||
|
||||
// Return a new two level iterator. A two-level iterator contains an
|
||||
// index iterator whose values point to a sequence of blocks where
|
||||
// each block is itself a sequence of key,value pairs. The returned
|
||||
@ -25,14 +38,7 @@ class InternalKeyComparator;
|
||||
//
|
||||
// Uses a supplied function to convert an index_iter value into
|
||||
// an iterator over the contents of the corresponding block.
|
||||
extern Iterator* NewTwoLevelIterator(
|
||||
Iterator* index_iter,
|
||||
Iterator* (*block_function)(
|
||||
void* arg, const ReadOptions& options, const EnvOptions& soptions,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
const Slice& index_value, bool for_compaction),
|
||||
void* arg, const ReadOptions& options, const EnvOptions& soptions,
|
||||
const InternalKeyComparator& internal_comparator,
|
||||
bool for_compaction = false);
|
||||
extern Iterator* NewTwoLevelIterator(TwoLevelIteratorState* state,
|
||||
Iterator* first_level_iter);
|
||||
|
||||
} // namespace rocksdb
|
||||
|
Loading…
Reference in New Issue
Block a user