Refactoring Version::Get()
Summary: Refactoring Version::Get() method to move file picker logic to a separate class. Test Plan: make check all Reviewers: igor, sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D19713
This commit is contained in:
parent
c11d604ab3
commit
0418e66e2a
@ -40,6 +40,263 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
namespace {
|
||||
|
||||
// Find File in FileLevel data structure
|
||||
// Within an index range defined by left and right
|
||||
int FindFileInRange(const InternalKeyComparator& icmp,
|
||||
const FileLevel& file_level,
|
||||
const Slice& key,
|
||||
uint32_t left,
|
||||
uint32_t right) {
|
||||
while (left < right) {
|
||||
uint32_t mid = (left + right) / 2;
|
||||
const FdWithKeyRange& f = file_level.files[mid];
|
||||
if (icmp.InternalKeyComparator::Compare(f.largest_key, key) < 0) {
|
||||
// Key at "mid.largest" is < "target". Therefore all
|
||||
// files at or before "mid" are uninteresting.
|
||||
left = mid + 1;
|
||||
} else {
|
||||
// Key at "mid.largest" is >= "target". Therefore all files
|
||||
// after "mid" are uninteresting.
|
||||
right = mid;
|
||||
}
|
||||
}
|
||||
return right;
|
||||
}
|
||||
|
||||
bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
|
||||
if (a->smallest_seqno != b->smallest_seqno) {
|
||||
return a->smallest_seqno > b->smallest_seqno;
|
||||
}
|
||||
if (a->largest_seqno != b->largest_seqno) {
|
||||
return a->largest_seqno > b->largest_seqno;
|
||||
}
|
||||
// Break ties by file number
|
||||
return a->fd.GetNumber() > b->fd.GetNumber();
|
||||
}
|
||||
|
||||
bool BySmallestKey(FileMetaData* a, FileMetaData* b,
|
||||
const InternalKeyComparator* cmp) {
|
||||
int r = cmp->Compare(a->smallest, b->smallest);
|
||||
if (r != 0) {
|
||||
return (r < 0);
|
||||
}
|
||||
// Break ties by file number
|
||||
return (a->fd.GetNumber() < b->fd.GetNumber());
|
||||
}
|
||||
|
||||
// Class to help choose the next file to search for the particular key.
|
||||
// Searches and returns files level by level.
|
||||
// We can search level-by-level since entries never hop across
|
||||
// levels. Therefore we are guaranteed that if we find data
|
||||
// in a smaller level, later levels are irrelevant (unless we
|
||||
// are MergeInProgress).
|
||||
class FilePicker {
|
||||
public:
|
||||
FilePicker(
|
||||
std::vector<FileMetaData*>* files,
|
||||
const Slice& user_key,
|
||||
const Slice& ikey,
|
||||
autovector<FileLevel>* file_levels,
|
||||
unsigned int num_levels,
|
||||
FileIndexer* file_indexer,
|
||||
const Comparator* user_comparator,
|
||||
const InternalKeyComparator* internal_comparator)
|
||||
: num_levels_(num_levels),
|
||||
curr_level_(-1),
|
||||
search_left_bound_(0),
|
||||
search_right_bound_(FileIndexer::kLevelMaxIndex),
|
||||
files_(files),
|
||||
file_levels_(file_levels),
|
||||
user_key_(user_key),
|
||||
ikey_(ikey),
|
||||
file_indexer_(file_indexer),
|
||||
user_comparator_(user_comparator),
|
||||
internal_comparator_(internal_comparator) {
|
||||
// Setup member variables to search first level.
|
||||
search_ended_ = !PrepareNextLevel();
|
||||
if (!search_ended_) {
|
||||
// Prefetch Level 0 table data to avoid cache miss if possible.
|
||||
for (unsigned int i = 0; i < (*file_levels_)[0].num_files; ++i) {
|
||||
auto* r = (*file_levels_)[0].files[i].fd.table_reader;
|
||||
if (r) {
|
||||
r->Prepare(ikey);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FdWithKeyRange* GetNextFile() {
|
||||
while (!search_ended_) { // Loops over different levels.
|
||||
while (curr_index_in_curr_level_ < curr_file_level_->num_files) {
|
||||
// Loops over all files in current level.
|
||||
FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_];
|
||||
int cmp_largest = -1;
|
||||
|
||||
// Do key range filtering of files or/and fractional cascading if:
|
||||
// (1) not all the files are in level 0, or
|
||||
// (2) there are more than 3 Level 0 files
|
||||
// If there are only 3 or less level 0 files in the system, we skip
|
||||
// the key range filtering. In this case, more likely, the system is
|
||||
// highly tuned to minimize number of tables queried by each query,
|
||||
// so it is unlikely that key range filtering is more efficient than
|
||||
// querying the files.
|
||||
if (num_levels_ > 1 || curr_file_level_->num_files > 3) {
|
||||
// Check if key is within a file's range. If search left bound and
|
||||
// right bound point to the same find, we are sure key falls in
|
||||
// range.
|
||||
assert(
|
||||
curr_level_ == 0 ||
|
||||
curr_index_in_curr_level_ == start_index_in_curr_level_ ||
|
||||
user_comparator_->Compare(user_key_,
|
||||
ExtractUserKey(f->smallest_key)) <= 0);
|
||||
|
||||
int cmp_smallest = user_comparator_->Compare(user_key_,
|
||||
ExtractUserKey(f->smallest_key));
|
||||
if (cmp_smallest >= 0) {
|
||||
cmp_largest = user_comparator_->Compare(user_key_,
|
||||
ExtractUserKey(f->largest_key));
|
||||
}
|
||||
|
||||
// Setup file search bound for the next level based on the
|
||||
// comparison results
|
||||
if (curr_level_ > 0) {
|
||||
file_indexer_->GetNextLevelIndex(curr_level_,
|
||||
curr_index_in_curr_level_,
|
||||
cmp_smallest, cmp_largest,
|
||||
&search_left_bound_,
|
||||
&search_right_bound_);
|
||||
}
|
||||
// Key falls out of current file's range
|
||||
if (cmp_smallest < 0 || cmp_largest > 0) {
|
||||
if (curr_level_ == 0) {
|
||||
++curr_index_in_curr_level_;
|
||||
continue;
|
||||
} else {
|
||||
// Search next level.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifndef NDEBUG
|
||||
// Sanity check to make sure that the files are correctly sorted
|
||||
if (prev_file_) {
|
||||
if (curr_level_ != 0) {
|
||||
int comp_sign = internal_comparator_->Compare(
|
||||
prev_file_->largest_key, f->smallest_key);
|
||||
assert(comp_sign < 0);
|
||||
} else {
|
||||
// level == 0, the current file cannot be newer than the previous
|
||||
// one. Use compressed data structure, has no attribute seqNo
|
||||
assert(curr_index_in_curr_level_ > 0);
|
||||
assert(!NewestFirstBySeqNo(files_[0][curr_index_in_curr_level_],
|
||||
files_[0][curr_index_in_curr_level_-1]));
|
||||
}
|
||||
}
|
||||
prev_file_ = f;
|
||||
#endif
|
||||
if (curr_level_ > 0 && cmp_largest < 0) {
|
||||
// No more files to search in this level.
|
||||
search_ended_ = !PrepareNextLevel();
|
||||
} else {
|
||||
++curr_index_in_curr_level_;
|
||||
}
|
||||
return f;
|
||||
}
|
||||
// Start searching next level.
|
||||
search_ended_ = !PrepareNextLevel();
|
||||
}
|
||||
// Search ended.
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
unsigned int num_levels_;
|
||||
unsigned int curr_level_;
|
||||
int search_left_bound_;
|
||||
int search_right_bound_;
|
||||
std::vector<FileMetaData*>* files_;
|
||||
autovector<FileLevel>* file_levels_;
|
||||
bool search_ended_;
|
||||
FileLevel* curr_file_level_;
|
||||
unsigned int curr_index_in_curr_level_;
|
||||
unsigned int start_index_in_curr_level_;
|
||||
Slice user_key_;
|
||||
Slice ikey_;
|
||||
FileIndexer* file_indexer_;
|
||||
const Comparator* user_comparator_;
|
||||
const InternalKeyComparator* internal_comparator_;
|
||||
#ifndef NDEBUG
|
||||
FdWithKeyRange* prev_file_;
|
||||
#endif
|
||||
|
||||
// Setup local variables to search next level.
|
||||
// Returns false if there are no more levels to search.
|
||||
bool PrepareNextLevel() {
|
||||
curr_level_++;
|
||||
while (curr_level_ < num_levels_) {
|
||||
curr_file_level_ = &(*file_levels_)[curr_level_];
|
||||
if (curr_file_level_->num_files == 0) {
|
||||
// When current level is empty, the search bound generated from upper
|
||||
// level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is
|
||||
// also empty.
|
||||
assert(search_left_bound_ == 0);
|
||||
assert(search_right_bound_ == -1 ||
|
||||
search_right_bound_ == FileIndexer::kLevelMaxIndex);
|
||||
// Since current level is empty, it will need to search all files in
|
||||
// the next level
|
||||
search_left_bound_ = 0;
|
||||
search_right_bound_ = FileIndexer::kLevelMaxIndex;
|
||||
curr_level_++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Some files may overlap each other. We find
|
||||
// all files that overlap user_key and process them in order from
|
||||
// newest to oldest. In the context of merge-operator, this can occur at
|
||||
// any level. Otherwise, it only occurs at Level-0 (since Put/Deletes
|
||||
// are always compacted into a single entry).
|
||||
int32_t start_index;
|
||||
if (curr_level_ == 0) {
|
||||
// On Level-0, we read through all files to check for overlap.
|
||||
start_index = 0;
|
||||
} else {
|
||||
// On Level-n (n>=1), files are sorted. Binary search to find the
|
||||
// earliest file whose largest key >= ikey. Search left bound and
|
||||
// right bound are used to narrow the range.
|
||||
if (search_left_bound_ == search_right_bound_) {
|
||||
start_index = search_left_bound_;
|
||||
} else if (search_left_bound_ < search_right_bound_) {
|
||||
if (search_right_bound_ == FileIndexer::kLevelMaxIndex) {
|
||||
search_right_bound_ = curr_file_level_->num_files - 1;
|
||||
}
|
||||
start_index = FindFileInRange(*internal_comparator_,
|
||||
*curr_file_level_, ikey_,
|
||||
search_left_bound_, search_right_bound_);
|
||||
} else {
|
||||
// search_left_bound > search_right_bound, key does not exist in
|
||||
// this level. Since no comparision is done in this level, it will
|
||||
// need to search all files in the next level.
|
||||
search_left_bound_ = 0;
|
||||
search_right_bound_ = FileIndexer::kLevelMaxIndex;
|
||||
curr_level_++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
start_index_in_curr_level_ = start_index;
|
||||
curr_index_in_curr_level_ = start_index;
|
||||
#ifndef NDEBUG
|
||||
prev_file_ = nullptr;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
// curr_level_ = num_levels_. So, no more levels to search.
|
||||
return false;
|
||||
}
|
||||
};
|
||||
} // anonymous namespace
|
||||
|
||||
static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
||||
uint64_t sum = 0;
|
||||
for (size_t i = 0; i < files.size() && files[i]; i++) {
|
||||
@ -82,29 +339,6 @@ Version::~Version() {
|
||||
delete[] files_;
|
||||
}
|
||||
|
||||
// Find File in FileLevel data structure
|
||||
// Within an index range defined by left and right
|
||||
int FindFileInRange(const InternalKeyComparator& icmp,
|
||||
const FileLevel& file_level,
|
||||
const Slice& key,
|
||||
uint32_t left,
|
||||
uint32_t right) {
|
||||
while (left < right) {
|
||||
uint32_t mid = (left + right) / 2;
|
||||
const FdWithKeyRange& f = file_level.files[mid];
|
||||
if (icmp.InternalKeyComparator::Compare(f.largest_key, key) < 0) {
|
||||
// Key at "mid.largest" is < "target". Therefore all
|
||||
// files at or before "mid" are uninteresting.
|
||||
left = mid + 1;
|
||||
} else {
|
||||
// Key at "mid.largest" is >= "target". Therefore all files
|
||||
// after "mid" are uninteresting.
|
||||
right = mid;
|
||||
}
|
||||
}
|
||||
return right;
|
||||
}
|
||||
|
||||
int FindFile(const InternalKeyComparator& icmp,
|
||||
const FileLevel& file_level,
|
||||
const Slice& key) {
|
||||
@ -507,28 +741,6 @@ static bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace {
|
||||
bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
|
||||
if (a->smallest_seqno != b->smallest_seqno) {
|
||||
return a->smallest_seqno > b->smallest_seqno;
|
||||
}
|
||||
if (a->largest_seqno != b->largest_seqno) {
|
||||
return a->largest_seqno > b->largest_seqno;
|
||||
}
|
||||
// Break ties by file number
|
||||
return a->fd.GetNumber() > b->fd.GetNumber();
|
||||
}
|
||||
bool BySmallestKey(FileMetaData* a, FileMetaData* b,
|
||||
const InternalKeyComparator* cmp) {
|
||||
int r = cmp->Compare(a->smallest, b->smallest);
|
||||
if (r != 0) {
|
||||
return (r < 0);
|
||||
}
|
||||
// Break ties by file number
|
||||
return (a->fd.GetNumber() < b->fd.GetNumber());
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
Version::Version(ColumnFamilyData* cfd, VersionSet* vset,
|
||||
uint64_t version_number)
|
||||
: cfd_(cfd),
|
||||
@ -591,166 +803,32 @@ void Version::Get(const ReadOptions& options,
|
||||
saver.logger = info_log_;
|
||||
saver.statistics = db_statistics_;
|
||||
|
||||
// We can search level-by-level since entries never hop across
|
||||
// levels. Therefore we are guaranteed that if we find data
|
||||
// in an smaller level, later levels are irrelevant (unless we
|
||||
// are MergeInProgress).
|
||||
|
||||
int32_t search_left_bound = 0;
|
||||
int32_t search_right_bound = FileIndexer::kLevelMaxIndex;
|
||||
for (int level = 0; level < num_non_empty_levels_; ++level) {
|
||||
int num_files = file_levels_[level].num_files;
|
||||
if (num_files == 0) {
|
||||
// When current level is empty, the search bound generated from upper
|
||||
// level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is
|
||||
// also empty.
|
||||
assert(search_left_bound == 0);
|
||||
assert(search_right_bound == -1 ||
|
||||
search_right_bound == FileIndexer::kLevelMaxIndex);
|
||||
// Since current level is empty, it will need to search all files in the
|
||||
// next level
|
||||
search_left_bound = 0;
|
||||
search_right_bound = FileIndexer::kLevelMaxIndex;
|
||||
continue;
|
||||
FilePicker fp(files_, user_key, ikey, &file_levels_, num_non_empty_levels_,
|
||||
&file_indexer_, user_comparator_, internal_comparator_);
|
||||
FdWithKeyRange* f = fp.GetNextFile();
|
||||
while (f != nullptr) {
|
||||
*status = table_cache_->Get(options, *internal_comparator_, f->fd, ikey,
|
||||
&saver, SaveValue, MarkKeyMayExist);
|
||||
// TODO: examine the behavior for corrupted key
|
||||
if (!status->ok()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Prefetch table data to avoid cache miss if possible
|
||||
if (level == 0) {
|
||||
for (int i = 0; i < num_files; ++i) {
|
||||
auto* r = file_levels_[0].files[i].fd.table_reader;
|
||||
if (r) {
|
||||
r->Prepare(ikey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the list of files to search in this level
|
||||
FdWithKeyRange* files = file_levels_[level].files;
|
||||
|
||||
// Some files may overlap each other. We find
|
||||
// all files that overlap user_key and process them in order from
|
||||
// newest to oldest. In the context of merge-operator,
|
||||
// this can occur at any level. Otherwise, it only occurs
|
||||
// at Level-0 (since Put/Deletes are always compacted into a single entry).
|
||||
int32_t start_index;
|
||||
if (level == 0) {
|
||||
// On Level-0, we read through all files to check for overlap.
|
||||
start_index = 0;
|
||||
} else {
|
||||
// On Level-n (n>=1), files are sorted. Binary search to find the earliest
|
||||
// file whose largest key >= ikey. Search left bound and right bound are
|
||||
// used to narrow the range.
|
||||
if (search_left_bound == search_right_bound) {
|
||||
start_index = search_left_bound;
|
||||
} else if (search_left_bound < search_right_bound) {
|
||||
if (search_right_bound == FileIndexer::kLevelMaxIndex) {
|
||||
search_right_bound = num_files - 1;
|
||||
}
|
||||
start_index = FindFileInRange(cfd_->internal_comparator(),
|
||||
file_levels_[level], ikey,
|
||||
search_left_bound, search_right_bound);
|
||||
} else {
|
||||
// search_left_bound > search_right_bound, key does not exist in this
|
||||
// level. Since no comparision is done in this level, it will need to
|
||||
// search all files in the next level.
|
||||
search_left_bound = 0;
|
||||
search_right_bound = FileIndexer::kLevelMaxIndex;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Traverse each relevant file to find the desired key
|
||||
#ifndef NDEBUG
|
||||
FdWithKeyRange* prev_file = nullptr;
|
||||
#endif
|
||||
|
||||
for (int32_t i = start_index; i < num_files;) {
|
||||
FdWithKeyRange* f = &files[i];
|
||||
assert(f->fd.GetNumber() == files_[level][i]->fd.GetNumber());
|
||||
int cmp_largest = -1;
|
||||
|
||||
// Do key range filtering of files or/and fractional cascading if:
|
||||
// (1) not all the files are in level 0, or
|
||||
// (2) there are more than 3 Level 0 files
|
||||
// If there are only 3 or less level 0 files in the system, we skip the
|
||||
// key range filtering. In this case, more likely, the system is highly
|
||||
// tuned to minimize number of tables queried by each query, so it is
|
||||
// unlikely that key range filtering is more efficient than querying the
|
||||
// files.
|
||||
if (num_non_empty_levels_ > 1 || num_files > 3) {
|
||||
// Check if key is within a file's range. If search left bound and right
|
||||
// bound point to the same find, we are sure key falls in range.
|
||||
assert(
|
||||
level == 0 || i == start_index || user_comparator_->Compare(
|
||||
user_key, ExtractUserKey(f->smallest_key)) <= 0);
|
||||
|
||||
int cmp_smallest = user_comparator_->Compare(user_key,
|
||||
ExtractUserKey(f->smallest_key));
|
||||
if (cmp_smallest >= 0) {
|
||||
cmp_largest = user_comparator_->Compare(user_key,
|
||||
ExtractUserKey(f->largest_key));
|
||||
}
|
||||
|
||||
// Setup file search bound for the next level based on the comparison
|
||||
// results
|
||||
if (level > 0) {
|
||||
file_indexer_.GetNextLevelIndex(level, i, cmp_smallest, cmp_largest,
|
||||
&search_left_bound,
|
||||
&search_right_bound);
|
||||
}
|
||||
// Key falls out of current file's range
|
||||
if (cmp_smallest < 0 || cmp_largest > 0) {
|
||||
if (level == 0) {
|
||||
++i;
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Sanity check to make sure that the files are correctly sorted
|
||||
if (prev_file) {
|
||||
if (level != 0) {
|
||||
int comp_sign = internal_comparator_->Compare(prev_file->largest_key,
|
||||
f->smallest_key);
|
||||
assert(comp_sign < 0);
|
||||
} else {
|
||||
// level == 0, the current file cannot be newer than the previous one.
|
||||
// Use compressed data structure, has no attribute seqNo
|
||||
assert(i > 0);
|
||||
assert(!NewestFirstBySeqNo(files_[0][i], files_[0][i-1]));
|
||||
}
|
||||
}
|
||||
prev_file = f;
|
||||
#endif
|
||||
*status = table_cache_->Get(options, *internal_comparator_, f->fd, ikey,
|
||||
&saver, SaveValue, MarkKeyMayExist);
|
||||
// TODO: examine the behavior for corrupted key
|
||||
if (!status->ok()) {
|
||||
switch (saver.state) {
|
||||
case kNotFound:
|
||||
break; // Keep searching in other files
|
||||
case kFound:
|
||||
return;
|
||||
}
|
||||
|
||||
switch (saver.state) {
|
||||
case kNotFound:
|
||||
break; // Keep searching in other files
|
||||
case kFound:
|
||||
return;
|
||||
case kDeleted:
|
||||
*status = Status::NotFound(); // Use empty error message for speed
|
||||
return;
|
||||
case kCorrupt:
|
||||
*status = Status::Corruption("corrupted key for ", user_key);
|
||||
return;
|
||||
case kMerge:
|
||||
break;
|
||||
}
|
||||
if (level > 0 && cmp_largest < 0) {
|
||||
case kDeleted:
|
||||
*status = Status::NotFound(); // Use empty error message for speed
|
||||
return;
|
||||
case kCorrupt:
|
||||
*status = Status::Corruption("corrupted key for ", user_key);
|
||||
return;
|
||||
case kMerge:
|
||||
break;
|
||||
} else {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
f = fp.GetNextFile();
|
||||
}
|
||||
|
||||
if (kMerge == saver.state) {
|
||||
|
Loading…
Reference in New Issue
Block a user