Avoid per-key upper bound check in BlockBasedTableIterator (#5101)
Summary: `BlockBasedTableIterator` avoid reading next block on `Next()` if it detects the iterator will be out of bound, by checking against index key. The optimization was added in #2239, and by the time it only check the bound per block. It seems later change make it a per-key check, which introduce unnecessary key comparisons. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5101 Differential Revision: D14678707 Pulled By: siying fbshipit-source-id: 2372446116753c7892ea4cec7b4b49ef87ba463e
This commit is contained in:
parent
09957ded1d
commit
f29dc1b906
@ -1026,11 +1026,8 @@ TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
|
||||
int upper_bound_hits = 0;
|
||||
Options options = CurrentOptions();
|
||||
rocksdb::SyncPoint::GetInstance()->SetCallBack(
|
||||
"BlockBasedTable::BlockEntryIteratorState::KeyReachedUpperBound",
|
||||
[&upper_bound_hits](void* arg) {
|
||||
assert(arg != nullptr);
|
||||
upper_bound_hits += (*static_cast<bool*>(arg) ? 1 : 0);
|
||||
});
|
||||
"BlockBasedTableIterator:out_of_bound",
|
||||
[&upper_bound_hits](void*) { upper_bound_hits++; });
|
||||
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||
options.env = env_;
|
||||
options.create_if_missing = true;
|
||||
|
@ -2346,6 +2346,7 @@ void BlockBasedTableIterator<TBlockIter, TValue>::Seek(const Slice& target) {
|
||||
block_iter_.Seek(target);
|
||||
|
||||
FindKeyForward();
|
||||
CheckOutOfBound();
|
||||
assert(
|
||||
!block_iter_.Valid() ||
|
||||
(key_includes_seq_ && icomp_.Compare(target, block_iter_.key()) <= 0) ||
|
||||
@ -2409,6 +2410,7 @@ void BlockBasedTableIterator<TBlockIter, TValue>::SeekToFirst() {
|
||||
InitDataBlock();
|
||||
block_iter_.SeekToFirst();
|
||||
FindKeyForward();
|
||||
CheckOutOfBound();
|
||||
}
|
||||
|
||||
template <class TBlockIter, typename TValue>
|
||||
@ -2491,18 +2493,24 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
|
||||
|
||||
template <class TBlockIter, typename TValue>
|
||||
void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyForward() {
|
||||
assert(!is_out_of_bound_);
|
||||
// TODO the while loop inherits from two-level-iterator. We don't know
|
||||
// whether a block can be empty so it can be replaced by an "if".
|
||||
while (!block_iter_.Valid()) {
|
||||
if (!block_iter_.status().ok()) {
|
||||
return;
|
||||
}
|
||||
if (read_options_.iterate_upper_bound != nullptr &&
|
||||
block_iter_points_to_real_block_) {
|
||||
is_out_of_bound_ =
|
||||
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
|
||||
ExtractUserKey(index_iter_->key())) <= 0);
|
||||
}
|
||||
ResetDataIter();
|
||||
// We used to check the current index key for upperbound.
|
||||
// It will only save a data reading for a small percentage of use cases,
|
||||
// so for code simplicity, we removed it. We can add it back if there is a
|
||||
// significnat performance regression.
|
||||
if (is_out_of_bound_) {
|
||||
// The next block is out of bound. No need to read it.
|
||||
TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr);
|
||||
return;
|
||||
}
|
||||
index_iter_->Next();
|
||||
|
||||
if (index_iter_->Valid()) {
|
||||
@ -2512,25 +2520,10 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyForward() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Check upper bound on the current key
|
||||
bool reached_upper_bound =
|
||||
(read_options_.iterate_upper_bound != nullptr &&
|
||||
block_iter_points_to_real_block_ && block_iter_.Valid() &&
|
||||
user_comparator_.Compare(ExtractUserKey(block_iter_.key()),
|
||||
*read_options_.iterate_upper_bound) >= 0);
|
||||
TEST_SYNC_POINT_CALLBACK(
|
||||
"BlockBasedTable::BlockEntryIteratorState::KeyReachedUpperBound",
|
||||
&reached_upper_bound);
|
||||
if (reached_upper_bound) {
|
||||
is_out_of_bound_ = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template <class TBlockIter, typename TValue>
|
||||
void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyBackward() {
|
||||
assert(!is_out_of_bound_);
|
||||
while (!block_iter_.Valid()) {
|
||||
if (!block_iter_.status().ok()) {
|
||||
return;
|
||||
@ -2551,6 +2544,16 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyBackward() {
|
||||
// code simplicity.
|
||||
}
|
||||
|
||||
template <class TBlockIter, typename TValue>
|
||||
void BlockBasedTableIterator<TBlockIter, TValue>::CheckOutOfBound() {
|
||||
if (read_options_.iterate_upper_bound != nullptr &&
|
||||
block_iter_points_to_real_block_ && block_iter_.Valid()) {
|
||||
is_out_of_bound_ =
|
||||
user_comparator_.Compare(*read_options_.iterate_upper_bound,
|
||||
ExtractUserKey(block_iter_.key())) <= 0;
|
||||
}
|
||||
}
|
||||
|
||||
InternalIterator* BlockBasedTable::NewIterator(
|
||||
const ReadOptions& read_options, const SliceTransform* prefix_extractor,
|
||||
Arena* arena, bool skip_filters, bool for_compaction) {
|
||||
|
@ -623,6 +623,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
}
|
||||
}
|
||||
|
||||
// Whether iterator invalidated for being out of bound.
|
||||
bool IsOutOfBound() override { return is_out_of_bound_; }
|
||||
|
||||
void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
|
||||
@ -673,6 +674,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<TValue> {
|
||||
void InitDataBlock();
|
||||
void FindKeyForward();
|
||||
void FindKeyBackward();
|
||||
void CheckOutOfBound();
|
||||
|
||||
private:
|
||||
BlockBasedTable* table_;
|
||||
|
@ -3842,6 +3842,35 @@ TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
|
||||
}
|
||||
}
|
||||
|
||||
// BlockBasedTableIterator should invalidate itself and return
|
||||
// OutOfBound()=true immediately after Seek(), to allow LevelIterator
|
||||
// filter out corresponding level.
|
||||
TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) {
|
||||
TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
|
||||
c.Add("foo", "v1");
|
||||
std::vector<std::string> keys;
|
||||
stl_wrappers::KVMap kvmap;
|
||||
Options options;
|
||||
const ImmutableCFOptions ioptions(options);
|
||||
const MutableCFOptions moptions(options);
|
||||
c.Finish(options, ioptions, moptions, BlockBasedTableOptions(),
|
||||
GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
|
||||
auto* reader = c.GetTableReader();
|
||||
ReadOptions read_opt;
|
||||
std::string upper_bound = "bar";
|
||||
Slice upper_bound_slice(upper_bound);
|
||||
read_opt.iterate_upper_bound = &upper_bound_slice;
|
||||
std::unique_ptr<InternalIterator> iter;
|
||||
iter.reset(reader->NewIterator(read_opt, nullptr /*prefix_extractor*/));
|
||||
iter->SeekToFirst();
|
||||
ASSERT_FALSE(iter->Valid());
|
||||
ASSERT_TRUE(iter->IsOutOfBound());
|
||||
iter.reset(reader->NewIterator(read_opt, nullptr /*prefix_extractor*/));
|
||||
iter->Seek("foo");
|
||||
ASSERT_FALSE(iter->Valid());
|
||||
ASSERT_TRUE(iter->IsOutOfBound());
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
Loading…
Reference in New Issue
Block a user