VersionSet: optmize GetOverlappingInputsRangeBinarySearch (#4987)

Summary:
`GetOverlappingInputsRangeBinarySearch` firstly use binary search
to find a index in the given range `[begin, end]`. But after find
the index, then use linear search to find the `start_index` and
`end_index`. So the search process degraded to linear time.

Here optmize the search process with below changes:

- use `std::lower_bound` and `std::upper_bound` to get
  `lg(n)` search complexity.
- use uniformed lambda for search process.
- simplify process for `within_interval` true or false.
- remove function `ExtendFileRangeWithinInterval`
  and `ExtendFileRangeOverlappingInterval`.

Signed-off-by: JiYou <jiyou09@gmail.com>
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4987

Differential Revision: D14984192

Pulled By: riversand963

fbshipit-source-id: fae4b8e59a21b7e350718d60cdc94dd55ac81e89
This commit is contained in:
JiYou 2019-04-17 18:12:20 -07:00 committed by Facebook Github Bot
parent 248b6b551e
commit 5b7e09bd6f
2 changed files with 66 additions and 195 deletions

View File

@ -2772,14 +2772,6 @@ void VersionStorageInfo::GetCleanInputsWithinInterval(
return;
}
const auto& level_files = level_files_brief_[level];
if (begin == nullptr) {
begin = &level_files.files[0].file_metadata->smallest;
}
if (end == nullptr) {
end = &level_files.files[level_files.num_files - 1].file_metadata->largest;
}
GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs,
hint_index, file_index,
true /* within_interval */);
@ -2797,67 +2789,94 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index,
bool within_interval, InternalKey** next_smallest) const {
assert(level > 0);
int min = 0;
int mid = 0;
int max = static_cast<int>(files_[level].size()) - 1;
bool foundOverlap = false;
auto user_cmp = user_comparator_;
const FdWithKeyRange* files = level_files_brief_[level].files;
const int num_files = static_cast<int>(level_files_brief_[level].num_files);
// if the caller already knows the index of a file that has overlap,
// then we can skip the binary search.
if (hint_index != -1) {
mid = hint_index;
foundOverlap = true;
}
// begin to use binary search to find lower bound
// and upper bound.
int start_index = 0;
int end_index = num_files;
while (!foundOverlap && min <= max) {
mid = (min + max)/2;
FdWithKeyRange* f = &(level_files_brief_[level].files[mid]);
auto& smallest = f->file_metadata->smallest;
auto& largest = f->file_metadata->largest;
if ((!within_interval && sstableKeyCompare(user_cmp, begin, largest) > 0) ||
(within_interval && sstableKeyCompare(user_cmp, begin, smallest) > 0)) {
min = mid + 1;
} else if ((!within_interval &&
sstableKeyCompare(user_cmp, smallest, end) > 0) ||
(within_interval &&
sstableKeyCompare(user_cmp, largest, end) > 0)) {
max = mid - 1;
} else {
foundOverlap = true;
break;
if (begin != nullptr) {
// if within_interval is true, with file_key would find
// not overlapping ranges in std::lower_bound.
auto cmp = [&user_cmp, &within_interval](const FdWithKeyRange& f,
const InternalKey* k) {
auto& file_key = within_interval ? f.file_metadata->smallest
: f.file_metadata->largest;
return sstableKeyCompare(user_cmp, file_key, *k) < 0;
};
start_index = static_cast<int>(
std::lower_bound(files,
files + (hint_index == -1 ? num_files : hint_index),
begin, cmp) -
files);
if (start_index > 0 && within_interval) {
bool is_overlapping = true;
while (is_overlapping && start_index < num_files) {
auto& pre_limit = files[start_index - 1].file_metadata->largest;
auto& cur_start = files[start_index].file_metadata->smallest;
is_overlapping = sstableKeyCompare(user_cmp, pre_limit, cur_start) == 0;
start_index += is_overlapping;
}
}
}
if (end != nullptr) {
// if within_interval is true, with file_key would find
// not overlapping ranges in std::upper_bound.
auto cmp = [&user_cmp, &within_interval](const InternalKey* k,
const FdWithKeyRange& f) {
auto& file_key = within_interval ? f.file_metadata->largest
: f.file_metadata->smallest;
return sstableKeyCompare(user_cmp, *k, file_key) < 0;
};
end_index = static_cast<int>(
std::upper_bound(files + start_index, files + num_files, end, cmp) -
files);
if (end_index < num_files && within_interval) {
bool is_overlapping = true;
while (is_overlapping && end_index > start_index) {
auto& next_start = files[end_index].file_metadata->smallest;
auto& cur_limit = files[end_index - 1].file_metadata->largest;
is_overlapping =
sstableKeyCompare(user_cmp, cur_limit, next_start) == 0;
end_index -= is_overlapping;
}
}
}
assert(start_index <= end_index);
// If there were no overlapping files, return immediately.
if (!foundOverlap) {
if (start_index == end_index) {
if (next_smallest) {
next_smallest = nullptr;
}
return;
}
assert(start_index < end_index);
// returns the index where an overlap is found
if (file_index) {
*file_index = mid;
*file_index = start_index;
}
int start_index, end_index;
if (within_interval) {
ExtendFileRangeWithinInterval(level, begin, end, mid,
&start_index, &end_index);
} else {
ExtendFileRangeOverlappingInterval(level, begin, end, mid,
&start_index, &end_index);
assert(end_index >= start_index);
}
// insert overlapping files into vector
for (int i = start_index; i <= end_index; i++) {
for (int i = start_index; i < end_index; i++) {
inputs->push_back(files_[level][i]);
}
if (next_smallest != nullptr) {
// Provide the next key outside the range covered by inputs
if (++end_index < static_cast<int>(files_[level].size())) {
if (end_index < static_cast<int>(files_[level].size())) {
**next_smallest = files_[level][end_index]->smallest;
} else {
*next_smallest = nullptr;
@ -2865,136 +2884,6 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
}
}
// Store in *start_index and *end_index the range of all files in
// "level" that overlap [begin,end]
// The mid_index specifies the index of at least one file that
// overlaps the specified range. From that file, iterate backward
// and forward to find all overlapping files.
// Use FileLevel in searching, make it faster
void VersionStorageInfo::ExtendFileRangeOverlappingInterval(
int level, const InternalKey* begin, const InternalKey* end,
unsigned int mid_index, int* start_index, int* end_index) const {
auto user_cmp = user_comparator_;
const FdWithKeyRange* files = level_files_brief_[level].files;
#ifndef NDEBUG
{
// assert that the file at mid_index overlaps with the range
assert(mid_index < level_files_brief_[level].num_files);
const FdWithKeyRange* f = &files[mid_index];
auto& smallest = f->file_metadata->smallest;
auto& largest = f->file_metadata->largest;
if (sstableKeyCompare(user_cmp, begin, smallest) <= 0) {
assert(sstableKeyCompare(user_cmp, smallest, end) <= 0);
} else {
// fprintf(stderr, "ExtendFileRangeOverlappingInterval\n%s - %s\n%s - %s\n%d %d\n",
// begin ? begin->DebugString().c_str() : "(null)",
// end ? end->DebugString().c_str() : "(null)",
// smallest->DebugString().c_str(),
// largest->DebugString().c_str(),
// sstableKeyCompare(user_cmp, smallest, begin),
// sstableKeyCompare(user_cmp, largest, begin));
assert(sstableKeyCompare(user_cmp, begin, largest) <= 0);
}
}
#endif
*start_index = mid_index + 1;
*end_index = mid_index;
int count __attribute__((__unused__));
count = 0;
// check backwards from 'mid' to lower indices
for (int i = mid_index; i >= 0 ; i--) {
const FdWithKeyRange* f = &files[i];
auto& largest = f->file_metadata->largest;
if (sstableKeyCompare(user_cmp, begin, largest) <= 0) {
*start_index = i;
assert((count++, true));
} else {
break;
}
}
// check forward from 'mid+1' to higher indices
for (unsigned int i = mid_index+1;
i < level_files_brief_[level].num_files; i++) {
const FdWithKeyRange* f = &files[i];
auto& smallest = f->file_metadata->smallest;
if (sstableKeyCompare(user_cmp, smallest, end) <= 0) {
assert((count++, true));
*end_index = i;
} else {
break;
}
}
assert(count == *end_index - *start_index + 1);
}
// Store in *start_index and *end_index the clean range of all files in
// "level" within [begin,end]
// The mid_index specifies the index of at least one file within
// the specified range. From that file, iterate backward
// and forward to find all overlapping files and then "shrink" to
// the clean range required.
// Use FileLevel in searching, make it faster
void VersionStorageInfo::ExtendFileRangeWithinInterval(
int level, const InternalKey* begin, const InternalKey* end,
unsigned int mid_index, int* start_index, int* end_index) const {
assert(level != 0);
auto* user_cmp = user_comparator_;
const FdWithKeyRange* files = level_files_brief_[level].files;
#ifndef NDEBUG
{
// assert that the file at mid_index is within the range
assert(mid_index < level_files_brief_[level].num_files);
const FdWithKeyRange* f = &files[mid_index];
auto& smallest = f->file_metadata->smallest;
auto& largest = f->file_metadata->largest;
assert(sstableKeyCompare(user_cmp, begin, smallest) <= 0 &&
sstableKeyCompare(user_cmp, largest, end) <= 0);
}
#endif
ExtendFileRangeOverlappingInterval(level, begin, end, mid_index,
start_index, end_index);
int left = *start_index;
int right = *end_index;
// shrink from left to right
while (left <= right) {
auto& smallest = files[left].file_metadata->smallest;
if (sstableKeyCompare(user_cmp, begin, smallest) > 0) {
left++;
continue;
}
if (left > 0) { // If not first file
auto& largest = files[left - 1].file_metadata->largest;
if (sstableKeyCompare(user_cmp, smallest, largest) == 0) {
left++;
continue;
}
}
break;
}
// shrink from right to left
while (left <= right) {
auto& largest = files[right].file_metadata->largest;
if (sstableKeyCompare(user_cmp, largest, end) > 0) {
right--;
continue;
}
if (right < static_cast<int>(level_files_brief_[level].num_files) -
1) { // If not the last file
auto& smallest = files[right + 1].file_metadata->smallest;
if (sstableKeyCompare(user_cmp, smallest, largest) == 0) {
// The last user key in range overlaps with the next file's first key
right--;
continue;
}
}
break;
}
*start_index = left;
*end_index = right;
}
uint64_t VersionStorageInfo::NumLevelBytes(int level) const {
assert(level >= 0);
assert(level < num_levels());

View File

@ -220,24 +220,6 @@ class VersionStorageInfo {
InternalKey** next_smallest = nullptr) // if non-null, returns the
const; // smallest key of next file not included
void ExtendFileRangeOverlappingInterval(
int level,
const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
unsigned int index, // start extending from this index
int* startIndex, // return the startIndex of input range
int* endIndex) // return the endIndex of input range
const;
void ExtendFileRangeWithinInterval(
int level,
const InternalKey* begin, // nullptr means before all keys
const InternalKey* end, // nullptr means after all keys
unsigned int index, // start extending from this index
int* startIndex, // return the startIndex of input range
int* endIndex) // return the endIndex of input range
const;
// Returns true iff some file in the specified level overlaps
// some part of [*smallest_user_key,*largest_user_key].
// smallest_user_key==NULL represents a key smaller than all keys in the DB.