From 3c0bb7fdd4006337df3e925d9f00789a15fd0756 Mon Sep 17 00:00:00 2001 From: Maysam Yabandeh Date: Fri, 11 Oct 2019 20:28:36 -0700 Subject: [PATCH] Fix SeekForPrev bug with Partitioned Filters and Prefix (#5907) Summary: Partition Filters make use of a top-level index to find the partition that might have the bloom hash of the key. The index is with internal key format (before format version 3). Each partition contains the i) blooms of the keys in that range ii) bloom of prefixes of keys in that range, iii) the bloom of the prefix of the last key in the previous partition. When ::SeekForPrev(key), we first perform a prefix bloom test on the SST file. The partition however is identified using the full internal key, rather than the prefix key. The reason is to be compatible with the internal key format of the top-level index. This creates a corner case. Example: - SST k, Partition N: P1K1, P1K2 - SST k, top-level index: P1K2 - SST k+1, Partition 1: P2K1, P3K1 - SST k+1 top-level index: P3K1 When SeekForPrev(P1K3), it should point us to P1K2. However SST k top-level index would reject P1K3 since it is out of range. One possible fix would be to search with the prefix P1 (instead of full internal key P1K3) however the details of properly comparing prefix with full internal key might get complicated. The fix we apply in this PR is to look into the last partition anyway even if the key is out of range. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5907 Differential Revision: D17889918 Pulled By: maysamyabandeh fbshipit-source-id: 169fd7b3c71dbc08808eae5a8340611ebe5bdc1e --- table/block_based/partitioned_filter_block.cc | 7 ++++++- table/block_based/partitioned_filter_block_test.cc | 12 +++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 1ba6b3c07..e3de03965 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -192,7 +192,12 @@ BlockHandle PartitionedFilterBlockReader::GetFilterPartitionHandle( index_key_includes_seq(), index_value_is_full()); iter.Seek(entry); if (UNLIKELY(!iter.Valid())) { - return BlockHandle(0, 0); + // entry is larger than all the keys. However its prefix might still be + // present in the last partition. If this is called by PrefixMayMatch this + // is necessary for correct behavior. Otherwise it is unnecessary but safe. + // Assuming this is an unlikely case for full key search, the performance + // overhead should be negligible. + iter.SeekToLast(); } assert(iter.Valid()); BlockHandle fltr_blk_handle = iter.value().handle; diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index aa667afed..66016e64e 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -327,7 +327,7 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { std::unique_ptr pib(NewIndexBuilder()); std::unique_ptr builder( NewBuilder(pib.get(), prefix_extractor.get())); - const std::string pkeys[3] = {"p-key1", "p-key2", "p-key3"}; + const std::string pkeys[3] = {"p-key10", "p-key20", "p-key30"}; builder->Add(pkeys[0]); CutABlock(pib.get(), pkeys[0], pkeys[1]); builder->Add(pkeys[1]); @@ -344,6 +344,16 @@ TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) { /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, /*lookup_context=*/nullptr)); } + // Non-existent keys but with the same prefix + const std::string pnonkeys[4] = {"p-key9", "p-key11", "p-key21", "p-key31"}; + for (auto key : pnonkeys) { + auto ikey = InternalKey(key, 0, ValueType::kTypeValue); + const Slice ikey_slice = Slice(*ikey.rep()); + ASSERT_TRUE(reader->PrefixMayMatch( + prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid, + /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr, + /*lookup_context=*/nullptr)); + } } TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) {