Refactoring Version::Get()

Summary: Refactoring Version::Get() method to move file picker logic to a separate class. Test Plan: make check all Reviewers: igor, sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D19713
2014-07-16 13:33:02 -07:00 · 2014-07-16 13:33:02 -07:00 · 0418e66e2a
commit 0418e66e2a
parent c11d604ab3
1 changed files with 278 additions and 200 deletions
--- a/db/version_set.cc
+++ b/db/version_set.cc
@ -40,6 +40,263 @@
 namespace rocksdb {
 namespace {
 // Find File in FileLevel data structure
 // Within an index range defined by left and right
 int FindFileInRange(const InternalKeyComparator& icmp,
    const FileLevel& file_level,
    const Slice& key,
    uint32_t left,
    uint32_t right) {
  while (left < right) {
    uint32_t mid = (left + right) / 2;
    const FdWithKeyRange& f = file_level.files[mid];
    if (icmp.InternalKeyComparator::Compare(f.largest_key, key) < 0) {
      // Key at "mid.largest" is < "target".  Therefore all
      // files at or before "mid" are uninteresting.
      left = mid + 1;
    } else {
      // Key at "mid.largest" is >= "target".  Therefore all files
      // after "mid" are uninteresting.
      right = mid;
    }
  }
  return right;
 }
 bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
  if (a->smallest_seqno != b->smallest_seqno) {
    return a->smallest_seqno > b->smallest_seqno;
  }
  if (a->largest_seqno != b->largest_seqno) {
    return a->largest_seqno > b->largest_seqno;
  }
  // Break ties by file number
  return a->fd.GetNumber() > b->fd.GetNumber();
 }
 bool BySmallestKey(FileMetaData* a, FileMetaData* b,
                   const InternalKeyComparator* cmp) {
  int r = cmp->Compare(a->smallest, b->smallest);
  if (r != 0) {
    return (r < 0);
  }
  // Break ties by file number
  return (a->fd.GetNumber() < b->fd.GetNumber());
 }
 // Class to help choose the next file to search for the particular key.
 // Searches and returns files level by level.
 // We can search level-by-level since entries never hop across
 // levels. Therefore we are guaranteed that if we find data
 // in a smaller level, later levels are irrelevant (unless we
 // are MergeInProgress).
 class FilePicker {
 public:
  FilePicker(
      std::vector<FileMetaData*>* files,
      const Slice& user_key,
      const Slice& ikey,
      autovector<FileLevel>* file_levels,
      unsigned int num_levels,
      FileIndexer* file_indexer,
      const Comparator* user_comparator,
      const InternalKeyComparator* internal_comparator)
      : num_levels_(num_levels),
        curr_level_(-1),
        search_left_bound_(0),
        search_right_bound_(FileIndexer::kLevelMaxIndex),
        files_(files),
        file_levels_(file_levels),
        user_key_(user_key),
        ikey_(ikey),
        file_indexer_(file_indexer),
        user_comparator_(user_comparator),
        internal_comparator_(internal_comparator) {
    // Setup member variables to search first level.
    search_ended_ = !PrepareNextLevel();
    if (!search_ended_) {
      // Prefetch Level 0 table data to avoid cache miss if possible.
      for (unsigned int i = 0; i < (*file_levels_)[0].num_files; ++i) {
        auto* r = (*file_levels_)[0].files[i].fd.table_reader;
        if (r) {
          r->Prepare(ikey);
        }
      }
    }
  }
  FdWithKeyRange* GetNextFile() {
    while (!search_ended_) {  // Loops over different levels.
      while (curr_index_in_curr_level_ < curr_file_level_->num_files) {
        // Loops over all files in current level.
        FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_];
        int cmp_largest = -1;
        // Do key range filtering of files or/and fractional cascading if:
        // (1) not all the files are in level 0, or
        // (2) there are more than 3 Level 0 files
        // If there are only 3 or less level 0 files in the system, we skip
        // the key range filtering. In this case, more likely, the system is
        // highly tuned to minimize number of tables queried by each query,
        // so it is unlikely that key range filtering is more efficient than
        // querying the files.
        if (num_levels_ > 1 || curr_file_level_->num_files > 3) {
          // Check if key is within a file's range. If search left bound and
          // right bound point to the same find, we are sure key falls in
          // range.
          assert(
              curr_level_ == 0 ||
              curr_index_in_curr_level_ == start_index_in_curr_level_ ||
              user_comparator_->Compare(user_key_,
                ExtractUserKey(f->smallest_key)) <= 0);
          int cmp_smallest = user_comparator_->Compare(user_key_,
              ExtractUserKey(f->smallest_key));
          if (cmp_smallest >= 0) {
            cmp_largest = user_comparator_->Compare(user_key_,
                ExtractUserKey(f->largest_key));
          }
          // Setup file search bound for the next level based on the
          // comparison results
          if (curr_level_ > 0) {
            file_indexer_->GetNextLevelIndex(curr_level_,
                                            curr_index_in_curr_level_,
                                            cmp_smallest, cmp_largest,
                                            &search_left_bound_,
                                            &search_right_bound_);
          }
          // Key falls out of current file's range
          if (cmp_smallest < 0 || cmp_largest > 0) {
            if (curr_level_ == 0) {
              ++curr_index_in_curr_level_;
              continue;
            } else {
              // Search next level.
              break;
            }
          }
        }
 #ifndef NDEBUG
        // Sanity check to make sure that the files are correctly sorted
        if (prev_file_) {
          if (curr_level_ != 0) {
            int comp_sign = internal_comparator_->Compare(
                prev_file_->largest_key, f->smallest_key);
            assert(comp_sign < 0);
          } else {
            // level == 0, the current file cannot be newer than the previous
            // one. Use compressed data structure, has no attribute seqNo
            assert(curr_index_in_curr_level_ > 0);
            assert(!NewestFirstBySeqNo(files_[0][curr_index_in_curr_level_],
                  files_[0][curr_index_in_curr_level_-1]));
          }
        }
        prev_file_ = f;
 #endif
        if (curr_level_ > 0 && cmp_largest < 0) {
          // No more files to search in this level.
          search_ended_ = !PrepareNextLevel();
        } else {
          ++curr_index_in_curr_level_;
        }
        return f;
      }
      // Start searching next level.
      search_ended_ = !PrepareNextLevel();
    }
    // Search ended.
    return nullptr;
  }
 private:
  unsigned int num_levels_;
  unsigned int curr_level_;
  int search_left_bound_;
  int search_right_bound_;
  std::vector<FileMetaData*>* files_;
  autovector<FileLevel>* file_levels_;
  bool search_ended_;
  FileLevel* curr_file_level_;
  unsigned int curr_index_in_curr_level_;
  unsigned int start_index_in_curr_level_;
  Slice user_key_;
  Slice ikey_;
  FileIndexer* file_indexer_;
  const Comparator* user_comparator_;
  const InternalKeyComparator* internal_comparator_;
 #ifndef NDEBUG
  FdWithKeyRange* prev_file_;
 #endif
  // Setup local variables to search next level.
  // Returns false if there are no more levels to search.
  bool PrepareNextLevel() {
    curr_level_++;
    while (curr_level_ < num_levels_) {
      curr_file_level_ = &(*file_levels_)[curr_level_];
      if (curr_file_level_->num_files == 0) {
        // When current level is empty, the search bound generated from upper
        // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is
        // also empty.
        assert(search_left_bound_ == 0);
        assert(search_right_bound_ == -1 ||
               search_right_bound_ == FileIndexer::kLevelMaxIndex);
        // Since current level is empty, it will need to search all files in
        // the next level
        search_left_bound_ = 0;
        search_right_bound_ = FileIndexer::kLevelMaxIndex;
        curr_level_++;
        continue;
      }
      // Some files may overlap each other. We find
      // all files that overlap user_key and process them in order from
      // newest to oldest. In the context of merge-operator, this can occur at
      // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes
      // are always compacted into a single entry).
      int32_t start_index;
      if (curr_level_ == 0) {
        // On Level-0, we read through all files to check for overlap.
        start_index = 0;
      } else {
        // On Level-n (n>=1), files are sorted. Binary search to find the
        // earliest file whose largest key >= ikey. Search left bound and
        // right bound are used to narrow the range.
        if (search_left_bound_ == search_right_bound_) {
          start_index = search_left_bound_;
        } else if (search_left_bound_ < search_right_bound_) {
          if (search_right_bound_ == FileIndexer::kLevelMaxIndex) {
            search_right_bound_ = curr_file_level_->num_files - 1;
          }
          start_index = FindFileInRange(*internal_comparator_,
              *curr_file_level_, ikey_,
              search_left_bound_, search_right_bound_);
        } else {
          // search_left_bound > search_right_bound, key does not exist in
          // this level. Since no comparision is done in this level, it will
          // need to search all files in the next level.
          search_left_bound_ = 0;
          search_right_bound_ = FileIndexer::kLevelMaxIndex;
          curr_level_++;
          continue;
        }
      }
      start_index_in_curr_level_ = start_index;
      curr_index_in_curr_level_ = start_index;
 #ifndef NDEBUG
      prev_file_ = nullptr;
 #endif
      return true;
    }
    // curr_level_ = num_levels_. So, no more levels to search.
    return false;
  }
 };
 }  // anonymous namespace
 static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
  uint64_t sum = 0;
  for (size_t i = 0; i < files.size() && files[i]; i++) {
@ -82,29 +339,6 @@ Version::~Version() {
  delete[] files_;
 }
 // Find File in FileLevel data structure
 // Within an index range defined by left and right
 int FindFileInRange(const InternalKeyComparator& icmp,
    const FileLevel& file_level,
    const Slice& key,
    uint32_t left,
    uint32_t right) {
  while (left < right) {
    uint32_t mid = (left + right) / 2;
    const FdWithKeyRange& f = file_level.files[mid];
    if (icmp.InternalKeyComparator::Compare(f.largest_key, key) < 0) {
      // Key at "mid.largest" is < "target".  Therefore all
      // files at or before "mid" are uninteresting.
      left = mid + 1;
    } else {
      // Key at "mid.largest" is >= "target".  Therefore all files
      // after "mid" are uninteresting.
      right = mid;
    }
  }
  return right;
 }
 int FindFile(const InternalKeyComparator& icmp,
             const FileLevel& file_level,
             const Slice& key) {
@ -507,28 +741,6 @@ static bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
  return false;
 }
 namespace {
 bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
  if (a->smallest_seqno != b->smallest_seqno) {
    return a->smallest_seqno > b->smallest_seqno;
  }
  if (a->largest_seqno != b->largest_seqno) {
    return a->largest_seqno > b->largest_seqno;
  }
  // Break ties by file number
  return a->fd.GetNumber() > b->fd.GetNumber();
 }
 bool BySmallestKey(FileMetaData* a, FileMetaData* b,
                   const InternalKeyComparator* cmp) {
  int r = cmp->Compare(a->smallest, b->smallest);
  if (r != 0) {
    return (r < 0);
  }
  // Break ties by file number
  return (a->fd.GetNumber() < b->fd.GetNumber());
 }
 }  // anonymous namespace
 Version::Version(ColumnFamilyData* cfd, VersionSet* vset,
                 uint64_t version_number)
    : cfd_(cfd),
@ -591,139 +803,10 @@ void Version::Get(const ReadOptions& options,
  saver.logger = info_log_;
  saver.statistics = db_statistics_;
-  // We can search level-by-level since entries never hop across
+  FilePicker fp(files_, user_key, ikey, &file_levels_, num_non_empty_levels_,
-  // levels. Therefore we are guaranteed that if we find data
+      &file_indexer_, user_comparator_, internal_comparator_);
-  // in an smaller level, later levels are irrelevant (unless we
+  FdWithKeyRange* f = fp.GetNextFile();
-  // are MergeInProgress).
+  while (f != nullptr) {
  int32_t search_left_bound = 0;
  int32_t search_right_bound = FileIndexer::kLevelMaxIndex;
  for (int level = 0; level < num_non_empty_levels_; ++level) {
    int num_files = file_levels_[level].num_files;
    if (num_files == 0) {
      // When current level is empty, the search bound generated from upper
      // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is
      // also empty.
      assert(search_left_bound == 0);
      assert(search_right_bound == -1 ||
             search_right_bound == FileIndexer::kLevelMaxIndex);
      // Since current level is empty, it will need to search all files in the
      // next level
      search_left_bound = 0;
      search_right_bound = FileIndexer::kLevelMaxIndex;
      continue;
    }
    // Prefetch table data to avoid cache miss if possible
    if (level == 0) {
      for (int i = 0; i < num_files; ++i) {
        auto* r = file_levels_[0].files[i].fd.table_reader;
        if (r) {
          r->Prepare(ikey);
        }
      }
    }
    // Get the list of files to search in this level
    FdWithKeyRange* files = file_levels_[level].files;
    // Some files may overlap each other. We find
    // all files that overlap user_key and process them in order from
    // newest to oldest. In the context of merge-operator,
    // this can occur at any level. Otherwise, it only occurs
    // at Level-0 (since Put/Deletes are always compacted into a single entry).
    int32_t start_index;
    if (level == 0) {
      // On Level-0, we read through all files to check for overlap.
      start_index = 0;
    } else {
      // On Level-n (n>=1), files are sorted. Binary search to find the earliest
      // file whose largest key >= ikey. Search left bound and right bound are
      // used to narrow the range.
      if (search_left_bound == search_right_bound) {
        start_index = search_left_bound;
      } else if (search_left_bound < search_right_bound) {
        if (search_right_bound == FileIndexer::kLevelMaxIndex) {
          search_right_bound = num_files - 1;
        }
        start_index = FindFileInRange(cfd_->internal_comparator(),
            file_levels_[level], ikey,
            search_left_bound, search_right_bound);
      } else {
        // search_left_bound > search_right_bound, key does not exist in this
        // level. Since no comparision is done in this level, it will need to
        // search all files in the next level.
        search_left_bound = 0;
        search_right_bound = FileIndexer::kLevelMaxIndex;
        continue;
      }
    }
    // Traverse each relevant file to find the desired key
 #ifndef NDEBUG
    FdWithKeyRange* prev_file = nullptr;
 #endif
    for (int32_t i = start_index; i < num_files;) {
      FdWithKeyRange* f = &files[i];
      assert(f->fd.GetNumber() == files_[level][i]->fd.GetNumber());
      int cmp_largest = -1;
      // Do key range filtering of files or/and fractional cascading if:
      // (1) not all the files are in level 0, or
      // (2) there are more than 3 Level 0 files
      // If there are only 3 or less level 0 files in the system, we skip the
      // key range filtering. In this case, more likely, the system is highly
      // tuned to minimize number of tables queried by each query, so it is
      // unlikely that key range filtering is more efficient than querying the
      // files.
      if (num_non_empty_levels_ > 1 || num_files > 3) {
        // Check if key is within a file's range. If search left bound and right
        // bound point to the same find, we are sure key falls in range.
        assert(
            level == 0 || i == start_index || user_comparator_->Compare(
                user_key, ExtractUserKey(f->smallest_key)) <= 0);
        int cmp_smallest = user_comparator_->Compare(user_key,
                                        ExtractUserKey(f->smallest_key));
        if (cmp_smallest >= 0) {
          cmp_largest = user_comparator_->Compare(user_key,
                                        ExtractUserKey(f->largest_key));
        }
        // Setup file search bound for the next level based on the comparison
        // results
        if (level > 0) {
          file_indexer_.GetNextLevelIndex(level, i, cmp_smallest, cmp_largest,
                                          &search_left_bound,
                                          &search_right_bound);
        }
        // Key falls out of current file's range
        if (cmp_smallest < 0 || cmp_largest > 0) {
          if (level == 0) {
            ++i;
            continue;
          } else {
            break;
          }
        }
      }
 #ifndef NDEBUG
      // Sanity check to make sure that the files are correctly sorted
      if (prev_file) {
        if (level != 0) {
          int comp_sign = internal_comparator_->Compare(prev_file->largest_key,
               f->smallest_key);
          assert(comp_sign < 0);
        } else {
          // level == 0, the current file cannot be newer than the previous one.
          // Use compressed data structure, has no attribute seqNo
          assert(i > 0);
          assert(!NewestFirstBySeqNo(files_[0][i], files_[0][i-1]));
        }
      }
      prev_file = f;
 #endif
    *status = table_cache_->Get(options, *internal_comparator_, f->fd, ikey,
                                &saver, SaveValue, MarkKeyMayExist);
    // TODO: examine the behavior for corrupted key
@ -745,12 +828,7 @@ void Version::Get(const ReadOptions& options,
      case kMerge:
        break;
    }
-      if (level > 0 && cmp_largest < 0) {
+    f = fp.GetNextFile();
        break;
      } else {
        ++i;
      }
    }
  }
  if (kMerge == saver.state) {