Block::Iter::PrefixSeek() to have an extra check to filter out some false matches

Summary:
In block based table's hash index checking, when looking for a key that doesn't exist, there is a high chance that a false block is returned because of hash bucket conflicts. In this revision, another check is done to filter out some of those cases: comparing previous key of the block boundary to see whether the target block is what we are looking for.

In a favored test setting (bloom filter disabled, 8 L0 files), I saw about 80% improvements. In a non-favored test setting (bloom filter enabled, files are all in L1, files are all cached), I see the performance penalty is less than 3%.

Test Plan: make all check

Reviewers: haobo, ljin

Reviewed By: ljin

Subscribers: wuj, leveldb, zagfox, yhchiang

Differential Revision: https://reviews.facebook.net/D20595
This commit is contained in:
sdong 2014-07-25 14:38:18 -07:00
parent 62f9b071ff
commit 4a8f0c957c

View File

@ -284,26 +284,36 @@ class Block::Iter : public Iterator {
return true;
}
// Compare target key and the block key of the block of `block_index`.
// Return -1 if error.
int CompareBlockKey(uint32_t block_index, const Slice& target) {
uint32_t region_offset = GetRestartPoint(block_index);
uint32_t shared, non_shared, value_length;
const char* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_,
&shared, &non_shared, &value_length);
if (key_ptr == nullptr || (shared != 0)) {
CorruptionError();
return 1; // Return target is smaller
}
Slice block_key(key_ptr, non_shared);
return Compare(block_key, target);
}
// Binary search in block_ids to find the first block
// with a key >= target
bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids,
uint32_t left, uint32_t right,
uint32_t* index) {
assert(left <= right);
uint32_t left_bound = left;
while (left <= right) {
uint32_t mid = (left + right) / 2;
uint32_t region_offset = GetRestartPoint(block_ids[mid]);
uint32_t shared, non_shared, value_length;
const char* key_ptr =
DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
&non_shared, &value_length);
if (key_ptr == nullptr || (shared != 0)) {
CorruptionError();
int cmp = CompareBlockKey(block_ids[mid], target);
if (!status_.ok()) {
return false;
}
Slice mid_key(key_ptr, non_shared);
int cmp = Compare(mid_key, target);
if (cmp < 0) {
// Key at "target" is larger than "mid". Therefore all
// blocks before or at "mid" are uninteresting.
@ -318,6 +328,19 @@ class Block::Iter : public Iterator {
}
if (left == right) {
// In one of the two following cases:
// (1) left is the first one of block_ids
// (2) there is a gap of blocks between block of `left` and `left-1`.
// we can further distinguish the case of key in the block or key not
// existing, by comparing the target key and the key of the previous
// block to the left of the block found.
if (block_ids[left] > 0 &&
(left == left_bound || block_ids[left - 1] != block_ids[left] - 1) &&
CompareBlockKey(block_ids[left] - 1, target) > 0) {
current_ = restarts_;
return false;
}
*index = block_ids[left];
return true;
} else {