Reduce iterator key comparison for upper/lower bound check (#5111)
Summary: Previously if iterator upper/lower bound presents, `DBIter` will check the bound for every key. This patch turns the check into per-file or per-data block check when applicable, by checking against either file largest/smallest key or block index key. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5111 Differential Revision: D15330061 Pulled By: siying fbshipit-source-id: 8a653fe3cd50d94d81eb2d13b087326c58ee2024
This commit is contained in:
parent
a13026fb2f
commit
f3a7847598
@ -11,6 +11,7 @@
|
||||
* Reduce binary search when iterator reseek into the same data block.
|
||||
* DBIter::Next() can skip user key checking if previous entry's seqnum is 0.
|
||||
* Merging iterator to avoid child iterator reseek for some cases
|
||||
* Reduce iterator key comparision for upper/lower bound check.
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
|
@ -467,7 +467,7 @@ inline bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check)
|
||||
|
||||
is_key_seqnum_zero_ = (ikey_.sequence == 0);
|
||||
|
||||
if (iterate_upper_bound_ != nullptr &&
|
||||
if (iterate_upper_bound_ != nullptr && iter_.MayBeOutOfUpperBound() &&
|
||||
user_comparator_.Compare(ikey_.user_key, *iterate_upper_bound_) >= 0) {
|
||||
break;
|
||||
}
|
||||
@ -859,7 +859,7 @@ void DBIter::PrevInternal() {
|
||||
return;
|
||||
}
|
||||
|
||||
if (iterate_lower_bound_ != nullptr &&
|
||||
if (iterate_lower_bound_ != nullptr && iter_.MayBeOutOfLowerBound() &&
|
||||
user_comparator_.Compare(saved_key_.GetUserKey(),
|
||||
*iterate_lower_bound_) < 0) {
|
||||
// We've iterated earlier than the user-specified lower bound.
|
||||
|
@ -887,7 +887,7 @@ class LevelIterator final : public InternalIterator {
|
||||
void SeekToFirst() override;
|
||||
void SeekToLast() override;
|
||||
void Next() final override;
|
||||
bool NextAndGetResult(Slice* ret_key) override;
|
||||
bool NextAndGetResult(IterateResult* result) override;
|
||||
void Prev() override;
|
||||
|
||||
bool Valid() const override { return file_iter_.Valid(); }
|
||||
@ -895,23 +895,38 @@ class LevelIterator final : public InternalIterator {
|
||||
assert(Valid());
|
||||
return file_iter_.key();
|
||||
}
|
||||
|
||||
Slice value() const override {
|
||||
assert(Valid());
|
||||
return file_iter_.value();
|
||||
}
|
||||
|
||||
Status status() const override {
|
||||
return file_iter_.iter() ? file_iter_.status() : Status::OK();
|
||||
}
|
||||
|
||||
inline bool MayBeOutOfLowerBound() override {
|
||||
assert(Valid());
|
||||
return may_be_out_of_lower_bound_ && file_iter_.MayBeOutOfLowerBound();
|
||||
}
|
||||
|
||||
inline bool MayBeOutOfUpperBound() override {
|
||||
assert(Valid());
|
||||
return file_iter_.MayBeOutOfUpperBound();
|
||||
}
|
||||
|
||||
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
|
||||
pinned_iters_mgr_ = pinned_iters_mgr;
|
||||
if (file_iter_.iter()) {
|
||||
file_iter_.SetPinnedItersMgr(pinned_iters_mgr);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsKeyPinned() const override {
|
||||
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
|
||||
file_iter_.iter() && file_iter_.IsKeyPinned();
|
||||
}
|
||||
|
||||
bool IsValuePinned() const override {
|
||||
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
|
||||
file_iter_.iter() && file_iter_.IsValuePinned();
|
||||
@ -954,12 +969,16 @@ class LevelIterator final : public InternalIterator {
|
||||
smallest_compaction_key = (*compaction_boundaries_)[file_index_].smallest;
|
||||
largest_compaction_key = (*compaction_boundaries_)[file_index_].largest;
|
||||
}
|
||||
may_be_out_of_lower_bound_ =
|
||||
read_options_.iterate_lower_bound != nullptr &&
|
||||
user_comparator_.Compare(ExtractUserKey(file_smallest_key(file_index_)),
|
||||
*read_options_.iterate_lower_bound) < 0;
|
||||
return table_cache_->NewIterator(
|
||||
read_options_, env_options_, icomparator_, *file_meta.file_metadata,
|
||||
range_del_agg_, prefix_extractor_,
|
||||
nullptr /* don't need reference to table */,
|
||||
file_read_hist_, for_compaction_, nullptr /* arena */, skip_filters_,
|
||||
level_, smallest_compaction_key, largest_compaction_key);
|
||||
nullptr /* don't need reference to table */, file_read_hist_,
|
||||
for_compaction_, nullptr /* arena */, skip_filters_, level_,
|
||||
smallest_compaction_key, largest_compaction_key);
|
||||
}
|
||||
|
||||
TableCache* table_cache_;
|
||||
@ -975,6 +994,7 @@ class LevelIterator final : public InternalIterator {
|
||||
bool should_sample_;
|
||||
bool for_compaction_;
|
||||
bool skip_filters_;
|
||||
bool may_be_out_of_lower_bound_ = true;
|
||||
size_t file_index_;
|
||||
int level_;
|
||||
RangeDelAggregator* range_del_agg_;
|
||||
@ -1043,11 +1063,12 @@ void LevelIterator::SeekToLast() {
|
||||
|
||||
void LevelIterator::Next() { NextImpl(); }
|
||||
|
||||
bool LevelIterator::NextAndGetResult(Slice* ret_key) {
|
||||
bool LevelIterator::NextAndGetResult(IterateResult* result) {
|
||||
NextImpl();
|
||||
bool is_valid = Valid();
|
||||
if (is_valid) {
|
||||
*ret_key = key();
|
||||
result->key = key();
|
||||
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
|
||||
}
|
||||
return is_valid;
|
||||
}
|
||||
@ -4278,10 +4299,9 @@ Status VersionSet::Recover(
|
||||
", last_sequence is %" PRIu64 ", log_number is %" PRIu64
|
||||
",prev_log_number is %" PRIu64 ",max_column_family is %" PRIu32
|
||||
",min_log_number_to_keep is %" PRIu64 "\n",
|
||||
manifest_path.c_str(), manifest_file_number_,
|
||||
next_file_number_.load(), last_sequence_.load(), log_number,
|
||||
prev_log_number_, column_family_set_->GetMaxColumnFamily(),
|
||||
min_log_number_to_keep_2pc());
|
||||
manifest_path.c_str(), manifest_file_number_, next_file_number_.load(),
|
||||
last_sequence_.load(), log_number, prev_log_number_,
|
||||
column_family_set_->GetMaxColumnFamily(), min_log_number_to_keep_2pc());
|
||||
|
||||
for (auto cfd : *column_family_set_) {
|
||||
if (cfd->IsDropped()) {
|
||||
|
@ -2446,11 +2446,12 @@ void BlockBasedTableIterator<TBlockIter, TValue>::Next() {
|
||||
|
||||
template <class TBlockIter, typename TValue>
|
||||
bool BlockBasedTableIterator<TBlockIter, TValue>::NextAndGetResult(
|
||||
Slice* ret_key) {
|
||||
IterateResult* result) {
|
||||
Next();
|
||||
bool is_valid = Valid();
|
||||
if (is_valid) {
|
||||
*ret_key = key();
|
||||
result->key = key();
|
||||
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
|
||||
}
|
||||
return is_valid;
|
||||
}
|
||||
@ -2531,6 +2532,11 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
|
||||
key_includes_seq_, index_key_is_full_,
|
||||
/* get_context */ nullptr, s, prefetch_buffer_.get());
|
||||
block_iter_points_to_real_block_ = true;
|
||||
if (read_options_.iterate_upper_bound != nullptr) {
|
||||
data_block_within_upper_bound_ =
|
||||
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
|
||||
index_iter_->user_key()) > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2543,13 +2549,9 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindBlockForward() {
|
||||
return;
|
||||
}
|
||||
// Whether next data block is out of upper bound, if there is one.
|
||||
bool next_block_is_out_of_bound = false;
|
||||
if (read_options_.iterate_upper_bound != nullptr &&
|
||||
block_iter_points_to_real_block_) {
|
||||
next_block_is_out_of_bound =
|
||||
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
|
||||
index_iter_->user_key()) <= 0);
|
||||
}
|
||||
bool next_block_is_out_of_bound =
|
||||
read_options_.iterate_upper_bound != nullptr &&
|
||||
block_iter_points_to_real_block_ && !data_block_within_upper_bound_;
|
||||
ResetDataIter();
|
||||
index_iter_->Next();
|
||||
if (next_block_is_out_of_bound) {
|
||||
|
@ -588,7 +588,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
void SeekToFirst() override;
|
||||
void SeekToLast() override;
|
||||
void Next() final override;
|
||||
bool NextAndGetResult(Slice* ret_key) override;
|
||||
bool NextAndGetResult(IterateResult* result) override;
|
||||
void Prev() override;
|
||||
bool Valid() const override {
|
||||
return !is_out_of_bound_ && block_iter_points_to_real_block_ &&
|
||||
@ -619,6 +619,11 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
// Whether iterator invalidated for being out of bound.
|
||||
bool IsOutOfBound() override { return is_out_of_bound_; }
|
||||
|
||||
inline bool MayBeOutOfUpperBound() override {
|
||||
assert(Valid());
|
||||
return !data_block_within_upper_bound_;
|
||||
}
|
||||
|
||||
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
|
||||
pinned_iters_mgr_ = pinned_iters_mgr;
|
||||
}
|
||||
@ -680,6 +685,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
TBlockIter block_iter_;
|
||||
bool block_iter_points_to_real_block_;
|
||||
bool is_out_of_bound_ = false;
|
||||
// Whether current data block being fully within iterate upper bound.
|
||||
bool data_block_within_upper_bound_ = false;
|
||||
bool check_filter_;
|
||||
// TODO(Zhongyi): pick a better name
|
||||
bool need_upper_bound_check_;
|
||||
|
@ -17,6 +17,11 @@ namespace rocksdb {
|
||||
|
||||
class PinnedIteratorsManager;
|
||||
|
||||
struct IterateResult {
|
||||
Slice key;
|
||||
bool may_be_out_of_upper_bound;
|
||||
};
|
||||
|
||||
template <class TValue>
|
||||
class InternalIteratorBase : public Cleanable {
|
||||
public:
|
||||
@ -55,11 +60,20 @@ class InternalIteratorBase : public Cleanable {
|
||||
// REQUIRES: Valid()
|
||||
virtual void Next() = 0;
|
||||
|
||||
virtual bool NextAndGetResult(Slice* ret_key) {
|
||||
// Moves to the next entry in the source, and return result. Iterator
|
||||
// implementation should override this method to help methods inline better,
|
||||
// or when MayBeOutOfUpperBound() is non-trivial.
|
||||
// REQUIRES: Valid()
|
||||
virtual bool NextAndGetResult(IterateResult* result) {
|
||||
Next();
|
||||
bool is_valid = Valid();
|
||||
if (is_valid) {
|
||||
*ret_key = key();
|
||||
result->key = key();
|
||||
// Default may_be_out_of_upper_bound to true to avoid unnecessary virtual
|
||||
// call. If an implementation has non-trivial MayBeOutOfUpperBound(),
|
||||
// it should also override NextAndGetResult().
|
||||
result->may_be_out_of_upper_bound = true;
|
||||
assert(MayBeOutOfUpperBound());
|
||||
}
|
||||
return is_valid;
|
||||
}
|
||||
@ -94,6 +108,13 @@ class InternalIteratorBase : public Cleanable {
|
||||
// upper bound
|
||||
virtual bool IsOutOfBound() { return false; }
|
||||
|
||||
// Keys return from this iterator can be smaller than iterate_lower_bound.
|
||||
virtual bool MayBeOutOfLowerBound() { return true; }
|
||||
|
||||
// Keys return from this iterator can be larger or equal to
|
||||
// iterate_upper_bound.
|
||||
virtual bool MayBeOutOfUpperBound() { return true; }
|
||||
|
||||
// Pass the PinnedIteratorsManager to the Iterator, most Iterators dont
|
||||
// communicate with PinnedIteratorsManager so default implementation is no-op
|
||||
// but for Iterators that need to communicate with PinnedIteratorsManager
|
||||
|
@ -56,7 +56,10 @@ class IteratorWrapperBase {
|
||||
|
||||
// Iterator interface methods
|
||||
bool Valid() const { return valid_; }
|
||||
Slice key() const { assert(Valid()); return key_; }
|
||||
Slice key() const {
|
||||
assert(Valid());
|
||||
return result_.key;
|
||||
}
|
||||
TValue value() const {
|
||||
assert(Valid());
|
||||
return iter_->value();
|
||||
@ -65,7 +68,7 @@ class IteratorWrapperBase {
|
||||
Status status() const { assert(iter_); return iter_->status(); }
|
||||
void Next() {
|
||||
assert(iter_);
|
||||
valid_ = iter_->NextAndGetResult(&key_);
|
||||
valid_ = iter_->NextAndGetResult(&result_);
|
||||
assert(!valid_ || iter_->status().ok());
|
||||
}
|
||||
void Prev() { assert(iter_); iter_->Prev(); Update(); }
|
||||
@ -83,6 +86,16 @@ class IteratorWrapperBase {
|
||||
void SeekToFirst() { assert(iter_); iter_->SeekToFirst(); Update(); }
|
||||
void SeekToLast() { assert(iter_); iter_->SeekToLast(); Update(); }
|
||||
|
||||
bool MayBeOutOfLowerBound() {
|
||||
assert(Valid());
|
||||
return iter_->MayBeOutOfLowerBound();
|
||||
}
|
||||
|
||||
bool MayBeOutOfUpperBound() {
|
||||
assert(Valid());
|
||||
return result_.may_be_out_of_upper_bound;
|
||||
}
|
||||
|
||||
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) {
|
||||
assert(iter_);
|
||||
iter_->SetPinnedItersMgr(pinned_iters_mgr);
|
||||
@ -100,14 +113,15 @@ class IteratorWrapperBase {
|
||||
void Update() {
|
||||
valid_ = iter_->Valid();
|
||||
if (valid_) {
|
||||
key_ = iter_->key();
|
||||
assert(iter_->status().ok());
|
||||
result_.key = iter_->key();
|
||||
result_.may_be_out_of_upper_bound = true;
|
||||
}
|
||||
}
|
||||
|
||||
InternalIteratorBase<TValue>* iter_;
|
||||
IterateResult result_;
|
||||
bool valid_;
|
||||
Slice key_;
|
||||
};
|
||||
|
||||
using IteratorWrapper = IteratorWrapperBase<Slice>;
|
||||
|
@ -227,6 +227,16 @@ class MergingIterator : public InternalIterator {
|
||||
current_ = CurrentForward();
|
||||
}
|
||||
|
||||
bool NextAndGetResult(IterateResult* result) override {
|
||||
Next();
|
||||
bool is_valid = Valid();
|
||||
if (is_valid) {
|
||||
result->key = key();
|
||||
result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
|
||||
}
|
||||
return is_valid;
|
||||
}
|
||||
|
||||
void Prev() override {
|
||||
assert(Valid());
|
||||
// Ensure that all children are positioned before key().
|
||||
@ -296,6 +306,20 @@ class MergingIterator : public InternalIterator {
|
||||
return current_->value();
|
||||
}
|
||||
|
||||
// Here we simply relay MayBeOutOfLowerBound/MayBeOutOfUpperBound result
|
||||
// from current child iterator. Potentially as long as one of child iterator
|
||||
// report out of bound is not possible, we know current key is within bound.
|
||||
|
||||
bool MayBeOutOfLowerBound() override {
|
||||
assert(Valid());
|
||||
return current_->MayBeOutOfLowerBound();
|
||||
}
|
||||
|
||||
bool MayBeOutOfUpperBound() override {
|
||||
assert(Valid());
|
||||
return current_->MayBeOutOfUpperBound();
|
||||
}
|
||||
|
||||
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
|
||||
pinned_iters_mgr_ = pinned_iters_mgr;
|
||||
for (auto& child : children_) {
|
||||
|
Loading…
Reference in New Issue
Block a user