Reduce iterator key comparison for upper/lower bound check (#5111)

Summary:
Previously if iterator upper/lower bound presents, `DBIter` will check the bound for every key. This patch turns the check into per-file or per-data block check when applicable, by checking against either file largest/smallest key or block index key.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5111

Differential Revision: D15330061

Pulled By: siying

fbshipit-source-id: 8a653fe3cd50d94d81eb2d13b087326c58ee2024
This commit is contained in:
yiwu-arbug 2019-05-17 10:23:38 -07:00 committed by Facebook Github Bot
parent a13026fb2f
commit f3a7847598
8 changed files with 117 additions and 28 deletions

View File

@ -11,6 +11,7 @@
* Reduce binary search when iterator reseek into the same data block. * Reduce binary search when iterator reseek into the same data block.
* DBIter::Next() can skip user key checking if previous entry's seqnum is 0. * DBIter::Next() can skip user key checking if previous entry's seqnum is 0.
* Merging iterator to avoid child iterator reseek for some cases * Merging iterator to avoid child iterator reseek for some cases
* Reduce iterator key comparision for upper/lower bound check.
### Bug Fixes ### Bug Fixes

View File

@ -467,7 +467,7 @@ inline bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check)
is_key_seqnum_zero_ = (ikey_.sequence == 0); is_key_seqnum_zero_ = (ikey_.sequence == 0);
if (iterate_upper_bound_ != nullptr && if (iterate_upper_bound_ != nullptr && iter_.MayBeOutOfUpperBound() &&
user_comparator_.Compare(ikey_.user_key, *iterate_upper_bound_) >= 0) { user_comparator_.Compare(ikey_.user_key, *iterate_upper_bound_) >= 0) {
break; break;
} }
@ -859,7 +859,7 @@ void DBIter::PrevInternal() {
return; return;
} }
if (iterate_lower_bound_ != nullptr && if (iterate_lower_bound_ != nullptr && iter_.MayBeOutOfLowerBound() &&
user_comparator_.Compare(saved_key_.GetUserKey(), user_comparator_.Compare(saved_key_.GetUserKey(),
*iterate_lower_bound_) < 0) { *iterate_lower_bound_) < 0) {
// We've iterated earlier than the user-specified lower bound. // We've iterated earlier than the user-specified lower bound.

View File

@ -887,7 +887,7 @@ class LevelIterator final : public InternalIterator {
void SeekToFirst() override; void SeekToFirst() override;
void SeekToLast() override; void SeekToLast() override;
void Next() final override; void Next() final override;
bool NextAndGetResult(Slice* ret_key) override; bool NextAndGetResult(IterateResult* result) override;
void Prev() override; void Prev() override;
bool Valid() const override { return file_iter_.Valid(); } bool Valid() const override { return file_iter_.Valid(); }
@ -895,23 +895,38 @@ class LevelIterator final : public InternalIterator {
assert(Valid()); assert(Valid());
return file_iter_.key(); return file_iter_.key();
} }
Slice value() const override { Slice value() const override {
assert(Valid()); assert(Valid());
return file_iter_.value(); return file_iter_.value();
} }
Status status() const override { Status status() const override {
return file_iter_.iter() ? file_iter_.status() : Status::OK(); return file_iter_.iter() ? file_iter_.status() : Status::OK();
} }
inline bool MayBeOutOfLowerBound() override {
assert(Valid());
return may_be_out_of_lower_bound_ && file_iter_.MayBeOutOfLowerBound();
}
inline bool MayBeOutOfUpperBound() override {
assert(Valid());
return file_iter_.MayBeOutOfUpperBound();
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
pinned_iters_mgr_ = pinned_iters_mgr; pinned_iters_mgr_ = pinned_iters_mgr;
if (file_iter_.iter()) { if (file_iter_.iter()) {
file_iter_.SetPinnedItersMgr(pinned_iters_mgr); file_iter_.SetPinnedItersMgr(pinned_iters_mgr);
} }
} }
bool IsKeyPinned() const override { bool IsKeyPinned() const override {
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
file_iter_.iter() && file_iter_.IsKeyPinned(); file_iter_.iter() && file_iter_.IsKeyPinned();
} }
bool IsValuePinned() const override { bool IsValuePinned() const override {
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
file_iter_.iter() && file_iter_.IsValuePinned(); file_iter_.iter() && file_iter_.IsValuePinned();
@ -954,12 +969,16 @@ class LevelIterator final : public InternalIterator {
smallest_compaction_key = (*compaction_boundaries_)[file_index_].smallest; smallest_compaction_key = (*compaction_boundaries_)[file_index_].smallest;
largest_compaction_key = (*compaction_boundaries_)[file_index_].largest; largest_compaction_key = (*compaction_boundaries_)[file_index_].largest;
} }
may_be_out_of_lower_bound_ =
read_options_.iterate_lower_bound != nullptr &&
user_comparator_.Compare(ExtractUserKey(file_smallest_key(file_index_)),
*read_options_.iterate_lower_bound) < 0;
return table_cache_->NewIterator( return table_cache_->NewIterator(
read_options_, env_options_, icomparator_, *file_meta.file_metadata, read_options_, env_options_, icomparator_, *file_meta.file_metadata,
range_del_agg_, prefix_extractor_, range_del_agg_, prefix_extractor_,
nullptr /* don't need reference to table */, nullptr /* don't need reference to table */, file_read_hist_,
file_read_hist_, for_compaction_, nullptr /* arena */, skip_filters_, for_compaction_, nullptr /* arena */, skip_filters_, level_,
level_, smallest_compaction_key, largest_compaction_key); smallest_compaction_key, largest_compaction_key);
} }
TableCache* table_cache_; TableCache* table_cache_;
@ -975,6 +994,7 @@ class LevelIterator final : public InternalIterator {
bool should_sample_; bool should_sample_;
bool for_compaction_; bool for_compaction_;
bool skip_filters_; bool skip_filters_;
bool may_be_out_of_lower_bound_ = true;
size_t file_index_; size_t file_index_;
int level_; int level_;
RangeDelAggregator* range_del_agg_; RangeDelAggregator* range_del_agg_;
@ -1043,11 +1063,12 @@ void LevelIterator::SeekToLast() {
void LevelIterator::Next() { NextImpl(); } void LevelIterator::Next() { NextImpl(); }
bool LevelIterator::NextAndGetResult(Slice* ret_key) { bool LevelIterator::NextAndGetResult(IterateResult* result) {
NextImpl(); NextImpl();
bool is_valid = Valid(); bool is_valid = Valid();
if (is_valid) { if (is_valid) {
*ret_key = key(); result->key = key();
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
} }
return is_valid; return is_valid;
} }
@ -4278,10 +4299,9 @@ Status VersionSet::Recover(
", last_sequence is %" PRIu64 ", log_number is %" PRIu64 ", last_sequence is %" PRIu64 ", log_number is %" PRIu64
",prev_log_number is %" PRIu64 ",max_column_family is %" PRIu32 ",prev_log_number is %" PRIu64 ",max_column_family is %" PRIu32
",min_log_number_to_keep is %" PRIu64 "\n", ",min_log_number_to_keep is %" PRIu64 "\n",
manifest_path.c_str(), manifest_file_number_, manifest_path.c_str(), manifest_file_number_, next_file_number_.load(),
next_file_number_.load(), last_sequence_.load(), log_number, last_sequence_.load(), log_number, prev_log_number_,
prev_log_number_, column_family_set_->GetMaxColumnFamily(), column_family_set_->GetMaxColumnFamily(), min_log_number_to_keep_2pc());
min_log_number_to_keep_2pc());
for (auto cfd : *column_family_set_) { for (auto cfd : *column_family_set_) {
if (cfd->IsDropped()) { if (cfd->IsDropped()) {

View File

@ -2446,11 +2446,12 @@ void BlockBasedTableIterator<TBlockIter, TValue>::Next() {
template <class TBlockIter, typename TValue> template <class TBlockIter, typename TValue>
bool BlockBasedTableIterator<TBlockIter, TValue>::NextAndGetResult( bool BlockBasedTableIterator<TBlockIter, TValue>::NextAndGetResult(
Slice* ret_key) { IterateResult* result) {
Next(); Next();
bool is_valid = Valid(); bool is_valid = Valid();
if (is_valid) { if (is_valid) {
*ret_key = key(); result->key = key();
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
} }
return is_valid; return is_valid;
} }
@ -2531,6 +2532,11 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
key_includes_seq_, index_key_is_full_, key_includes_seq_, index_key_is_full_,
/* get_context */ nullptr, s, prefetch_buffer_.get()); /* get_context */ nullptr, s, prefetch_buffer_.get());
block_iter_points_to_real_block_ = true; block_iter_points_to_real_block_ = true;
if (read_options_.iterate_upper_bound != nullptr) {
data_block_within_upper_bound_ =
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
index_iter_->user_key()) > 0);
}
} }
} }
@ -2543,13 +2549,9 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindBlockForward() {
return; return;
} }
// Whether next data block is out of upper bound, if there is one. // Whether next data block is out of upper bound, if there is one.
bool next_block_is_out_of_bound = false; bool next_block_is_out_of_bound =
if (read_options_.iterate_upper_bound != nullptr && read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_) { block_iter_points_to_real_block_ && !data_block_within_upper_bound_;
next_block_is_out_of_bound =
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
index_iter_->user_key()) <= 0);
}
ResetDataIter(); ResetDataIter();
index_iter_->Next(); index_iter_->Next();
if (next_block_is_out_of_bound) { if (next_block_is_out_of_bound) {

View File

@ -588,7 +588,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
void SeekToFirst() override; void SeekToFirst() override;
void SeekToLast() override; void SeekToLast() override;
void Next() final override; void Next() final override;
bool NextAndGetResult(Slice* ret_key) override; bool NextAndGetResult(IterateResult* result) override;
void Prev() override; void Prev() override;
bool Valid() const override { bool Valid() const override {
return !is_out_of_bound_ && block_iter_points_to_real_block_ && return !is_out_of_bound_ && block_iter_points_to_real_block_ &&
@ -619,6 +619,11 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
// Whether iterator invalidated for being out of bound. // Whether iterator invalidated for being out of bound.
bool IsOutOfBound() override { return is_out_of_bound_; } bool IsOutOfBound() override { return is_out_of_bound_; }
inline bool MayBeOutOfUpperBound() override {
assert(Valid());
return !data_block_within_upper_bound_;
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
pinned_iters_mgr_ = pinned_iters_mgr; pinned_iters_mgr_ = pinned_iters_mgr;
} }
@ -680,6 +685,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
TBlockIter block_iter_; TBlockIter block_iter_;
bool block_iter_points_to_real_block_; bool block_iter_points_to_real_block_;
bool is_out_of_bound_ = false; bool is_out_of_bound_ = false;
// Whether current data block being fully within iterate upper bound.
bool data_block_within_upper_bound_ = false;
bool check_filter_; bool check_filter_;
// TODO(Zhongyi): pick a better name // TODO(Zhongyi): pick a better name
bool need_upper_bound_check_; bool need_upper_bound_check_;

View File

@ -17,6 +17,11 @@ namespace rocksdb {
class PinnedIteratorsManager; class PinnedIteratorsManager;
struct IterateResult {
Slice key;
bool may_be_out_of_upper_bound;
};
template <class TValue> template <class TValue>
class InternalIteratorBase : public Cleanable { class InternalIteratorBase : public Cleanable {
public: public:
@ -55,11 +60,20 @@ class InternalIteratorBase : public Cleanable {
// REQUIRES: Valid() // REQUIRES: Valid()
virtual void Next() = 0; virtual void Next() = 0;
virtual bool NextAndGetResult(Slice* ret_key) { // Moves to the next entry in the source, and return result. Iterator
// implementation should override this method to help methods inline better,
// or when MayBeOutOfUpperBound() is non-trivial.
// REQUIRES: Valid()
virtual bool NextAndGetResult(IterateResult* result) {
Next(); Next();
bool is_valid = Valid(); bool is_valid = Valid();
if (is_valid) { if (is_valid) {
*ret_key = key(); result->key = key();
// Default may_be_out_of_upper_bound to true to avoid unnecessary virtual
// call. If an implementation has non-trivial MayBeOutOfUpperBound(),
// it should also override NextAndGetResult().
result->may_be_out_of_upper_bound = true;
assert(MayBeOutOfUpperBound());
} }
return is_valid; return is_valid;
} }
@ -94,6 +108,13 @@ class InternalIteratorBase : public Cleanable {
// upper bound // upper bound
virtual bool IsOutOfBound() { return false; } virtual bool IsOutOfBound() { return false; }
// Keys return from this iterator can be smaller than iterate_lower_bound.
virtual bool MayBeOutOfLowerBound() { return true; }
// Keys return from this iterator can be larger or equal to
// iterate_upper_bound.
virtual bool MayBeOutOfUpperBound() { return true; }
// Pass the PinnedIteratorsManager to the Iterator, most Iterators dont // Pass the PinnedIteratorsManager to the Iterator, most Iterators dont
// communicate with PinnedIteratorsManager so default implementation is no-op // communicate with PinnedIteratorsManager so default implementation is no-op
// but for Iterators that need to communicate with PinnedIteratorsManager // but for Iterators that need to communicate with PinnedIteratorsManager

View File

@ -56,7 +56,10 @@ class IteratorWrapperBase {
// Iterator interface methods // Iterator interface methods
bool Valid() const { return valid_; } bool Valid() const { return valid_; }
Slice key() const { assert(Valid()); return key_; } Slice key() const {
assert(Valid());
return result_.key;
}
TValue value() const { TValue value() const {
assert(Valid()); assert(Valid());
return iter_->value(); return iter_->value();
@ -65,7 +68,7 @@ class IteratorWrapperBase {
Status status() const { assert(iter_); return iter_->status(); } Status status() const { assert(iter_); return iter_->status(); }
void Next() { void Next() {
assert(iter_); assert(iter_);
valid_ = iter_->NextAndGetResult(&key_); valid_ = iter_->NextAndGetResult(&result_);
assert(!valid_ || iter_->status().ok()); assert(!valid_ || iter_->status().ok());
} }
void Prev() { assert(iter_); iter_->Prev(); Update(); } void Prev() { assert(iter_); iter_->Prev(); Update(); }
@ -83,6 +86,16 @@ class IteratorWrapperBase {
void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); } void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); }
void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); } void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); }
bool MayBeOutOfLowerBound() {
assert(Valid());
return iter_->MayBeOutOfLowerBound();
}
bool MayBeOutOfUpperBound() {
assert(Valid());
return result_.may_be_out_of_upper_bound;
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) { void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) {
assert(iter_); assert(iter_);
iter_->SetPinnedItersMgr(pinned_iters_mgr); iter_->SetPinnedItersMgr(pinned_iters_mgr);
@ -100,14 +113,15 @@ class IteratorWrapperBase {
void Update() { void Update() {
valid_ = iter_->Valid(); valid_ = iter_->Valid();
if (valid_) { if (valid_) {
key_ = iter_->key();
assert(iter_->status().ok()); assert(iter_->status().ok());
result_.key = iter_->key();
result_.may_be_out_of_upper_bound = true;
} }
} }
InternalIteratorBase<TValue>* iter_; InternalIteratorBase<TValue>* iter_;
IterateResult result_;
bool valid_; bool valid_;
Slice key_;
}; };
using IteratorWrapper = IteratorWrapperBase<Slice>; using IteratorWrapper = IteratorWrapperBase<Slice>;

View File

@ -227,6 +227,16 @@ class MergingIterator : public InternalIterator {
current_ = CurrentForward(); current_ = CurrentForward();
} }
bool NextAndGetResult(IterateResult* result) override {
Next();
bool is_valid = Valid();
if (is_valid) {
result->key = key();
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
}
return is_valid;
}
void Prev() override { void Prev() override {
assert(Valid()); assert(Valid());
// Ensure that all children are positioned before key(). // Ensure that all children are positioned before key().
@ -296,6 +306,20 @@ class MergingIterator : public InternalIterator {
return current_->value(); return current_->value();
} }
// Here we simply relay MayBeOutOfLowerBound/MayBeOutOfUpperBound result
// from current child iterator. Potentially as long as one of child iterator
// report out of bound is not possible, we know current key is within bound.
bool MayBeOutOfLowerBound() override {
assert(Valid());
return current_->MayBeOutOfLowerBound();
}
bool MayBeOutOfUpperBound() override {
assert(Valid());
return current_->MayBeOutOfUpperBound();
}
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
pinned_iters_mgr_ = pinned_iters_mgr; pinned_iters_mgr_ = pinned_iters_mgr;
for (auto& child : children_) { for (auto& child : children_) {