Block::Iter::PrefixSeek() to have an extra check to filter out some false matches
Summary: In block based table's hash index checking, when looking for a key that doesn't exist, there is a high chance that a false block is returned because of hash bucket conflicts. In this revision, another check is done to filter out some of those cases: comparing previous key of the block boundary to see whether the target block is what we are looking for. In a favored test setting (bloom filter disabled, 8 L0 files), I saw about 80% improvements. In a non-favored test setting (bloom filter enabled, files are all in L1, files are all cached), I see the performance penalty is less than 3%. Test Plan: make all check Reviewers: haobo, ljin Reviewed By: ljin Subscribers: wuj, leveldb, zagfox, yhchiang Differential Revision: https://reviews.facebook.net/D20595
This commit is contained in:
parent
62f9b071ff
commit
4a8f0c957c
@ -284,26 +284,36 @@ class Block::Iter : public Iterator {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Compare target key and the block key of the block of `block_index`.
|
||||
// Return -1 if error.
|
||||
int CompareBlockKey(uint32_t block_index, const Slice& target) {
|
||||
uint32_t region_offset = GetRestartPoint(block_index);
|
||||
uint32_t shared, non_shared, value_length;
|
||||
const char* key_ptr = DecodeEntry(data_ + region_offset, data_ + restarts_,
|
||||
&shared, &non_shared, &value_length);
|
||||
if (key_ptr == nullptr || (shared != 0)) {
|
||||
CorruptionError();
|
||||
return 1; // Return target is smaller
|
||||
}
|
||||
Slice block_key(key_ptr, non_shared);
|
||||
return Compare(block_key, target);
|
||||
}
|
||||
|
||||
// Binary search in block_ids to find the first block
|
||||
// with a key >= target
|
||||
bool BinaryBlockIndexSeek(const Slice& target, uint32_t* block_ids,
|
||||
uint32_t left, uint32_t right,
|
||||
uint32_t* index) {
|
||||
assert(left <= right);
|
||||
uint32_t left_bound = left;
|
||||
|
||||
while (left <= right) {
|
||||
uint32_t mid = (left + right) / 2;
|
||||
uint32_t region_offset = GetRestartPoint(block_ids[mid]);
|
||||
uint32_t shared, non_shared, value_length;
|
||||
const char* key_ptr =
|
||||
DecodeEntry(data_ + region_offset, data_ + restarts_, &shared,
|
||||
&non_shared, &value_length);
|
||||
if (key_ptr == nullptr || (shared != 0)) {
|
||||
CorruptionError();
|
||||
|
||||
int cmp = CompareBlockKey(block_ids[mid], target);
|
||||
if (!status_.ok()) {
|
||||
return false;
|
||||
}
|
||||
Slice mid_key(key_ptr, non_shared);
|
||||
int cmp = Compare(mid_key, target);
|
||||
if (cmp < 0) {
|
||||
// Key at "target" is larger than "mid". Therefore all
|
||||
// blocks before or at "mid" are uninteresting.
|
||||
@ -318,6 +328,19 @@ class Block::Iter : public Iterator {
|
||||
}
|
||||
|
||||
if (left == right) {
|
||||
// In one of the two following cases:
|
||||
// (1) left is the first one of block_ids
|
||||
// (2) there is a gap of blocks between block of `left` and `left-1`.
|
||||
// we can further distinguish the case of key in the block or key not
|
||||
// existing, by comparing the target key and the key of the previous
|
||||
// block to the left of the block found.
|
||||
if (block_ids[left] > 0 &&
|
||||
(left == left_bound || block_ids[left - 1] != block_ids[left] - 1) &&
|
||||
CompareBlockKey(block_ids[left] - 1, target) > 0) {
|
||||
current_ = restarts_;
|
||||
return false;
|
||||
}
|
||||
|
||||
*index = block_ids[left];
|
||||
return true;
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user