Fix MultiGet() bug when whole_key_filtering is disabled (#5665)

Summary:
The batched MultiGet() implementation was not correctly handling bloom filter lookups when whole_key_filtering is disabled. It was incorrectly skipping keys not in the prefix_extractor domain, and not calling transform for keys in domain. This PR fixes both problems by moving the domain check and transformation to the FilterBlockReader.

Tests:
Unit test (confirmed failed before the fix)
make check
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5665

Differential Revision: D16902380

Pulled By: anand1976

fbshipit-source-id: a6be81ad68a6e37134a65246aec7a2c590eccf00
This commit is contained in:
anand76 2019-08-21 10:21:41 -07:00 committed by Facebook Github Bot
parent 7bc18e2727
commit 9046bdc5d3
6 changed files with 79 additions and 16 deletions

View File

@ -3,6 +3,7 @@
### Bug Fixes
* Fixed a number of data races in BlobDB.
* Fix a bug where the compaction snapshot refresh feature is not disabled as advertised when `snap_refresh_nanos` is set to 0..
* Fix bloom filter lookups by the MultiGet batching API when BlockBasedTableOptions::whole_key_filtering is false, by checking that a key is in the perfix_extractor domain and extracting the prefix before looking up.
### New Features
* VerifyChecksum() by default will issue readahead. Allow ReadOptions to be passed in to those functions to override the readhead size. For checksum verifying before external SST file ingestion, a new option IngestExternalFileOptions.verify_checksums_readahead_size, is added for this readahead setting.

View File

@ -1287,6 +1287,53 @@ TEST_F(DBBasicTest, MultiGetBatchedMultiLevel) {
}
}
// Test class for batched MultiGet with prefix extractor
// Param bool - If true, use partitioned filters
// If false, use full filter block
class MultiGetPrefixExtractorTest
: public DBBasicTest,
public ::testing::WithParamInterface<bool> {};
TEST_P(MultiGetPrefixExtractorTest, Batched) {
Options options = CurrentOptions();
options.prefix_extractor.reset(NewFixedPrefixTransform(2));
BlockBasedTableOptions bbto;
if (GetParam()) {
bbto.index_type = BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
bbto.partition_filters = true;
}
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
bbto.whole_key_filtering = false;
bbto.cache_index_and_filter_blocks = false;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
Reopen(options);
// First key is not in the prefix_extractor domain
ASSERT_OK(Put("k", "v0"));
ASSERT_OK(Put("kk1", "v1"));
ASSERT_OK(Put("kk2", "v2"));
ASSERT_OK(Put("kk3", "v3"));
ASSERT_OK(Put("kk4", "v4"));
ASSERT_OK(Flush());
std::vector<std::string> keys({"k", "kk1", "kk2", "kk3", "kk4"});
std::vector<std::string> values;
SetPerfLevel(kEnableCount);
get_perf_context()->Reset();
values = MultiGet(keys, nullptr);
ASSERT_EQ(values[0], "v0");
ASSERT_EQ(values[1], "v1");
ASSERT_EQ(values[2], "v2");
ASSERT_EQ(values[3], "v3");
ASSERT_EQ(values[4], "v4");
// Filter hits for 4 in-domain keys
ASSERT_EQ(get_perf_context()->bloom_sst_hit_count, 4);
}
INSTANTIATE_TEST_CASE_P(
MultiGetPrefix, MultiGetPrefixExtractorTest,
::testing::Bool());
#ifndef ROCKSDB_LITE
TEST_F(DBBasicTest, GetAllKeyVersions) {
Options options = CurrentOptions();

View File

@ -3155,13 +3155,6 @@ void BlockBasedTable::FullFilterKeysMayMatch(
} else if (!read_options.total_order_seek && prefix_extractor &&
rep_->table_properties->prefix_extractor_name.compare(
prefix_extractor->Name()) == 0) {
for (auto iter = range->begin(); iter != range->end(); ++iter) {
Slice user_key = iter->lkey->user_key();
if (!prefix_extractor->InDomain(user_key)) {
range->SkipKey(iter);
}
}
filter->PrefixesMayMatch(range, prefix_extractor, kNotValid, false,
lookup_context);
}

View File

@ -137,7 +137,8 @@ class FilterBlockReader {
const Slice ukey = iter->ukey;
const Slice ikey = iter->ikey;
GetContext* const get_context = iter->get_context;
if (!KeyMayMatch(prefix_extractor->Transform(ukey), prefix_extractor,
if (prefix_extractor->InDomain(ukey) &&
!PrefixMayMatch(prefix_extractor->Transform(ukey), prefix_extractor,
block_offset, no_io, &ikey, get_context,
lookup_context)) {
range->SkipKey(iter);

View File

@ -3,6 +3,7 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <array>
#include "table/block_based/full_filter_block.h"
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
@ -208,11 +209,11 @@ void FullFilterBlockReader::KeysMayMatch(
// present
return;
}
MayMatch(range, no_io, lookup_context);
MayMatch(range, no_io, nullptr, lookup_context);
}
void FullFilterBlockReader::PrefixesMayMatch(
MultiGetRange* range, const SliceTransform* /* prefix_extractor */,
MultiGetRange* range, const SliceTransform* prefix_extractor,
uint64_t block_offset, const bool no_io,
BlockCacheLookupContext* lookup_context) {
#ifdef NDEBUG
@ -220,11 +221,12 @@ void FullFilterBlockReader::PrefixesMayMatch(
(void)block_offset;
#endif
assert(block_offset == kNotValid);
MayMatch(range, no_io, lookup_context);
MayMatch(range, no_io, prefix_extractor, lookup_context);
}
void FullFilterBlockReader::MayMatch(
MultiGetRange* range, bool no_io,
const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const {
CachableEntry<BlockContents> filter_block;
@ -252,18 +254,36 @@ void FullFilterBlockReader::MayMatch(
// &may_match[0] doesn't work for autovector<bool> (compiler error). So
// declare both keys and may_match as arrays, which is also slightly less
// expensive compared to autovector
Slice* keys[MultiGetContext::MAX_BATCH_SIZE];
bool may_match[MultiGetContext::MAX_BATCH_SIZE] = {false};
std::array<Slice*, MultiGetContext::MAX_BATCH_SIZE> keys;
std::array<bool, MultiGetContext::MAX_BATCH_SIZE> may_match = {{true}};
autovector<Slice, MultiGetContext::MAX_BATCH_SIZE> prefixes;
int num_keys = 0;
for (auto iter = range->begin(); iter != range->end(); ++iter) {
keys[num_keys++] = &iter->ukey;
MultiGetRange filter_range(*range, range->begin(), range->end());
for (auto iter = filter_range.begin();
iter != filter_range.end(); ++iter) {
if (!prefix_extractor) {
keys[num_keys++] = &iter->ukey;
} else if (prefix_extractor->InDomain(iter->ukey)) {
prefixes.emplace_back(prefix_extractor->Transform(iter->ukey));
keys[num_keys++] = &prefixes.back();
} else {
filter_range.SkipKey(iter);
}
}
filter_bits_reader->MayMatch(num_keys, &keys[0], &may_match[0]);
int i = 0;
for (auto iter = range->begin(); iter != range->end(); ++iter) {
for (auto iter = filter_range.begin();
iter != filter_range.end(); ++iter) {
if (!may_match[i]) {
// Update original MultiGet range to skip this key. The filter_range
// was temporarily used just to skip keys not in prefix_extractor domain
range->SkipKey(iter);
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
} else {
//PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
PerfContext* perf_ctx = get_perf_context();
perf_ctx->bloom_sst_hit_count++;
}
++i;
}

View File

@ -124,6 +124,7 @@ class FullFilterBlockReader : public FilterBlockReaderCommon<BlockContents> {
bool MayMatch(const Slice& entry, bool no_io, GetContext* get_context,
BlockCacheLookupContext* lookup_context) const;
void MayMatch(MultiGetRange* range, bool no_io,
const SliceTransform* prefix_extractor,
BlockCacheLookupContext* lookup_context) const;
bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix,
const Comparator* comparator) const;