From 4479dff208f8880ad853d9d6c52df64d90b6a0c1 Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Wed, 1 May 2019 14:23:48 -0700 Subject: [PATCH] Reduce binary search when reseek into the same data block (#5256) Summary: Right now, when Seek() is called again, RocksDB always does a binary search against the files and index blocks, even if they end up with the same file/block. Improve it as following: 1. in LevelIterator, reseek first try to check the boundary of the current file. If it falls into the same file, skip the binary search to find the file 2. in block based table iterator, reseek skip to reseek the iterator block if the seek key is larger than the current key and lower than the index key (boundary of the current block and the next block). Pull Request resolved: https://github.com/facebook/rocksdb/pull/5256 Differential Revision: D15105072 Pulled By: siying fbshipit-source-id: 39634bdb4a881082451fa39cecd7ecf12160bf80 --- HISTORY.md | 3 + db/db_iterator_test.cc | 98 +++++++++++++++++++++++++++++++ db/version_set.cc | 20 ++++++- table/block.cc | 1 + table/block_based_table_reader.cc | 34 ++++++++--- 5 files changed, 146 insertions(+), 10 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 2662cdea0..011ce0a99 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,9 @@ ### Public API Change * Now DB::Close() will return Aborted() error when there is unreleased snapshot. Users can retry after all snapshots are released. +### New Features +* Reduce binary search when iterator reseek into the same data block. + ## 6.2.0 (4/30/2019) ### New Features * Add an option `strict_bytes_per_sync` that causes a file-writing thread to block rather than exceed the limit on bytes pending writeback specified by `bytes_per_sync` or `wal_bytes_per_sync`. diff --git a/db/db_iterator_test.cc b/db/db_iterator_test.cc index ec5fc8006..78b387577 100644 --- a/db/db_iterator_test.cc +++ b/db/db_iterator_test.cc @@ -2450,6 +2450,104 @@ TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) { ASSERT_EQ("a", it->key().ToString()); } +TEST_P(DBIteratorTest, AvoidReseekLevelIterator) { + Options options = CurrentOptions(); + options.compression = CompressionType::kNoCompression; + BlockBasedTableOptions table_options; + table_options.block_size = 800; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + Reopen(options); + + Random rnd(301); + std::string random_str = RandomString(&rnd, 180); + + ASSERT_OK(Put("1", random_str)); + ASSERT_OK(Put("2", random_str)); + ASSERT_OK(Put("3", random_str)); + ASSERT_OK(Put("4", random_str)); + // A new block + ASSERT_OK(Put("5", random_str)); + ASSERT_OK(Put("6", random_str)); + ASSERT_OK(Put("7", random_str)); + ASSERT_OK(Flush()); + ASSERT_OK(Put("8", random_str)); + ASSERT_OK(Put("9", random_str)); + ASSERT_OK(Flush()); + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); + + int num_find_file_in_level = 0; + int num_idx_blk_seek = 0; + SyncPoint::GetInstance()->SetCallBack( + "LevelIterator::Seek:BeforeFindFile", + [&](void* /*arg*/) { num_find_file_in_level++; }); + SyncPoint::GetInstance()->SetCallBack( + "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; }); + SyncPoint::GetInstance()->EnableProcessing(); + + { + std::unique_ptr iter(NewIterator(ReadOptions())); + iter->Seek("1"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(1, num_find_file_in_level); + ASSERT_EQ(1, num_idx_blk_seek); + + iter->Seek("2"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(1, num_find_file_in_level); + ASSERT_EQ(1, num_idx_blk_seek); + + iter->Seek("3"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(1, num_find_file_in_level); + ASSERT_EQ(1, num_idx_blk_seek); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(1, num_find_file_in_level); + ASSERT_EQ(1, num_idx_blk_seek); + + iter->Seek("5"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(1, num_find_file_in_level); + ASSERT_EQ(2, num_idx_blk_seek); + + iter->Seek("6"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(1, num_find_file_in_level); + ASSERT_EQ(2, num_idx_blk_seek); + + iter->Seek("7"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(1, num_find_file_in_level); + ASSERT_EQ(3, num_idx_blk_seek); + + iter->Seek("8"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(2, num_find_file_in_level); + // Still re-seek because "8" is the boundary key, which has + // the same user key as the seek key. + ASSERT_EQ(4, num_idx_blk_seek); + + iter->Seek("5"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(3, num_find_file_in_level); + ASSERT_EQ(5, num_idx_blk_seek); + + iter->Next(); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(3, num_find_file_in_level); + ASSERT_EQ(5, num_idx_blk_seek); + + // Seek backward never triggers the index block seek to be skipped + iter->Seek("5"); + ASSERT_TRUE(iter->Valid()); + ASSERT_EQ(3, num_find_file_in_level); + ASSERT_EQ(6, num_idx_blk_seek); + } + + SyncPoint::GetInstance()->DisableProcessing(); +} + INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest, testing::Values(true, false)); diff --git a/db/version_set.cc b/db/version_set.cc index fdc07fee0..63d5af3af 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1007,9 +1007,25 @@ class LevelIterator final : public InternalIterator { }; void LevelIterator::Seek(const Slice& target) { - size_t new_file_index = FindFile(icomparator_, *flevel_, target); + // Check whether the seek key fall under the same file + bool need_to_reseek = true; + if (file_iter_.iter() != nullptr && file_index_ < flevel_->num_files) { + const FdWithKeyRange& cur_file = flevel_->files[file_index_]; + if (icomparator_.InternalKeyComparator::Compare( + target, cur_file.largest_key) <= 0 && + icomparator_.InternalKeyComparator::Compare( + target, cur_file.smallest_key) >= 0) { + need_to_reseek = false; + assert(static_cast(FindFile(icomparator_, *flevel_, target)) == + file_index_); + } + } + if (need_to_reseek) { + TEST_SYNC_POINT("LevelIterator::Seek:BeforeFindFile"); + size_t new_file_index = FindFile(icomparator_, *flevel_, target); + InitFileIterator(new_file_index); + } - InitFileIterator(new_file_index); if (file_iter_.iter() != nullptr) { file_iter_.Seek(target); } diff --git a/table/block.cc b/table/block.cc index 80bef4a91..a6cc8d270 100644 --- a/table/block.cc +++ b/table/block.cc @@ -381,6 +381,7 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) { } void IndexBlockIter::Seek(const Slice& target) { + TEST_SYNC_POINT("IndexBlockIter::Seek:0"); Slice seek_key = target; if (!key_includes_seq_) { seek_key = ExtractUserKey(target); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index d6c9ab887..e39fd2a86 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -2334,16 +2334,34 @@ void BlockBasedTableIterator::Seek(const Slice& target) { return; } - SavePrevIndexValue(); - - index_iter_->Seek(target); - - if (!index_iter_->Valid()) { - ResetDataIter(); - return; + bool need_seek_index = true; + if (block_iter_points_to_real_block_) { + // Reseek. + prev_index_value_ = index_iter_->value(); + // We can avoid an index seek if: + // 1. The new seek key is larger than the current key + // 2. The new seek key is within the upper bound of the block + // Since we don't necessarily know the internal key for either + // the current key or the upper bound, we check user keys and + // exclude the equality case. Considering internal keys can + // improve for the boundary cases, but it would complicate the + // code. + if (user_comparator_.Compare(ExtractUserKey(target), + block_iter_.user_key()) > 0 && + user_comparator_.Compare(ExtractUserKey(target), + index_iter_->user_key()) < 0) { + need_seek_index = false; + } } - InitDataBlock(); + if (need_seek_index) { + index_iter_->Seek(target); + if (!index_iter_->Valid()) { + ResetDataIter(); + return; + } + InitDataBlock(); + } block_iter_.Seek(target);