Refactoring Version::Get()

Summary: Refactoring Version::Get() method to move file picker logic to a separate class. Test Plan: make check all Reviewers: igor, sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D19713
2014-07-16 13:33:02 -07:00 · 2014-07-16 13:33:02 -07:00 · 0418e66e2a
commit 0418e66e2a
parent c11d604ab3
1 changed files with 278 additions and 200 deletions
--- a/db/version_set.cc
+++ b/db/version_set.cc
@ -40,6 +40,263 @@

 namespace rocksdb {

+namespace {
+
+// Find File in FileLevel data structure
+// Within an index range defined by left and right
+int FindFileInRange(const InternalKeyComparator& icmp,
+    const FileLevel& file_level,
+    const Slice& key,
+    uint32_t left,
+    uint32_t right) {
+  while (left < right) {
+    uint32_t mid = (left + right) / 2;
+    const FdWithKeyRange& f = file_level.files[mid];
+    if (icmp.InternalKeyComparator::Compare(f.largest_key, key) < 0) {
+      // Key at "mid.largest" is < "target".  Therefore all
+      // files at or before "mid" are uninteresting.
+      left = mid + 1;
+    } else {
+      // Key at "mid.largest" is >= "target".  Therefore all files
+      // after "mid" are uninteresting.
+      right = mid;
+    }
+  }
+  return right;
+}
+
+bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
+  if (a->smallest_seqno != b->smallest_seqno) {
+    return a->smallest_seqno > b->smallest_seqno;
+  }
+  if (a->largest_seqno != b->largest_seqno) {
+    return a->largest_seqno > b->largest_seqno;
+  }
+  // Break ties by file number
+  return a->fd.GetNumber() > b->fd.GetNumber();
+}
+
+bool BySmallestKey(FileMetaData* a, FileMetaData* b,
+                   const InternalKeyComparator* cmp) {
+  int r = cmp->Compare(a->smallest, b->smallest);
+  if (r != 0) {
+    return (r < 0);
+  }
+  // Break ties by file number
+  return (a->fd.GetNumber() < b->fd.GetNumber());
+}
+
+// Class to help choose the next file to search for the particular key.
+// Searches and returns files level by level.
+// We can search level-by-level since entries never hop across
+// levels. Therefore we are guaranteed that if we find data
+// in a smaller level, later levels are irrelevant (unless we
+// are MergeInProgress).
+class FilePicker {
+ public:
+  FilePicker(
+      std::vector<FileMetaData*>* files,
+      const Slice& user_key,
+      const Slice& ikey,
+      autovector<FileLevel>* file_levels,
+      unsigned int num_levels,
+      FileIndexer* file_indexer,
+      const Comparator* user_comparator,
+      const InternalKeyComparator* internal_comparator)
+      : num_levels_(num_levels),
+        curr_level_(-1),
+        search_left_bound_(0),
+        search_right_bound_(FileIndexer::kLevelMaxIndex),
+        files_(files),
+        file_levels_(file_levels),
+        user_key_(user_key),
+        ikey_(ikey),
+        file_indexer_(file_indexer),
+        user_comparator_(user_comparator),
+        internal_comparator_(internal_comparator) {
+    // Setup member variables to search first level.
+    search_ended_ = !PrepareNextLevel();
+    if (!search_ended_) {
+      // Prefetch Level 0 table data to avoid cache miss if possible.
+      for (unsigned int i = 0; i < (*file_levels_)[0].num_files; ++i) {
+        auto* r = (*file_levels_)[0].files[i].fd.table_reader;
+        if (r) {
+          r->Prepare(ikey);
+        }
+      }
+    }
+  }
+
+  FdWithKeyRange* GetNextFile() {
+    while (!search_ended_) {  // Loops over different levels.
+      while (curr_index_in_curr_level_ < curr_file_level_->num_files) {
+        // Loops over all files in current level.
+        FdWithKeyRange* f = &curr_file_level_->files[curr_index_in_curr_level_];
+        int cmp_largest = -1;
+
+        // Do key range filtering of files or/and fractional cascading if:
+        // (1) not all the files are in level 0, or
+        // (2) there are more than 3 Level 0 files
+        // If there are only 3 or less level 0 files in the system, we skip
+        // the key range filtering. In this case, more likely, the system is
+        // highly tuned to minimize number of tables queried by each query,
+        // so it is unlikely that key range filtering is more efficient than
+        // querying the files.
+        if (num_levels_ > 1 || curr_file_level_->num_files > 3) {
+          // Check if key is within a file's range. If search left bound and
+          // right bound point to the same find, we are sure key falls in
+          // range.
+          assert(
+              curr_level_ == 0 ||
+              curr_index_in_curr_level_ == start_index_in_curr_level_ ||
+              user_comparator_->Compare(user_key_,
+                ExtractUserKey(f->smallest_key)) <= 0);
+
+          int cmp_smallest = user_comparator_->Compare(user_key_,
+              ExtractUserKey(f->smallest_key));
+          if (cmp_smallest >= 0) {
+            cmp_largest = user_comparator_->Compare(user_key_,
+                ExtractUserKey(f->largest_key));
+          }
+
+          // Setup file search bound for the next level based on the
+          // comparison results
+          if (curr_level_ > 0) {
+            file_indexer_->GetNextLevelIndex(curr_level_,
+                                            curr_index_in_curr_level_,
+                                            cmp_smallest, cmp_largest,
+                                            &search_left_bound_,
+                                            &search_right_bound_);
+          }
+          // Key falls out of current file's range
+          if (cmp_smallest < 0 || cmp_largest > 0) {
+            if (curr_level_ == 0) {
+              ++curr_index_in_curr_level_;
+              continue;
+            } else {
+              // Search next level.
+              break;
+            }
+          }
+        }
+#ifndef NDEBUG
+        // Sanity check to make sure that the files are correctly sorted
+        if (prev_file_) {
+          if (curr_level_ != 0) {
+            int comp_sign = internal_comparator_->Compare(
+                prev_file_->largest_key, f->smallest_key);
+            assert(comp_sign < 0);
+          } else {
+            // level == 0, the current file cannot be newer than the previous
+            // one. Use compressed data structure, has no attribute seqNo
+            assert(curr_index_in_curr_level_ > 0);
+            assert(!NewestFirstBySeqNo(files_[0][curr_index_in_curr_level_],
+                  files_[0][curr_index_in_curr_level_-1]));
+          }
+        }
+        prev_file_ = f;
+#endif
+        if (curr_level_ > 0 && cmp_largest < 0) {
+          // No more files to search in this level.
+          search_ended_ = !PrepareNextLevel();
+        } else {
+          ++curr_index_in_curr_level_;
+        }
+        return f;
+      }
+      // Start searching next level.
+      search_ended_ = !PrepareNextLevel();
+    }
+    // Search ended.
+    return nullptr;
+  }
+
+ private:
+  unsigned int num_levels_;
+  unsigned int curr_level_;
+  int search_left_bound_;
+  int search_right_bound_;
+  std::vector<FileMetaData*>* files_;
+  autovector<FileLevel>* file_levels_;
+  bool search_ended_;
+  FileLevel* curr_file_level_;
+  unsigned int curr_index_in_curr_level_;
+  unsigned int start_index_in_curr_level_;
+  Slice user_key_;
+  Slice ikey_;
+  FileIndexer* file_indexer_;
+  const Comparator* user_comparator_;
+  const InternalKeyComparator* internal_comparator_;
+#ifndef NDEBUG
+  FdWithKeyRange* prev_file_;
+#endif
+
+  // Setup local variables to search next level.
+  // Returns false if there are no more levels to search.
+  bool PrepareNextLevel() {
+    curr_level_++;
+    while (curr_level_ < num_levels_) {
+      curr_file_level_ = &(*file_levels_)[curr_level_];
+      if (curr_file_level_->num_files == 0) {
+        // When current level is empty, the search bound generated from upper
+        // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is
+        // also empty.
+        assert(search_left_bound_ == 0);
+        assert(search_right_bound_ == -1 ||
+               search_right_bound_ == FileIndexer::kLevelMaxIndex);
+        // Since current level is empty, it will need to search all files in
+        // the next level
+        search_left_bound_ = 0;
+        search_right_bound_ = FileIndexer::kLevelMaxIndex;
+        curr_level_++;
+        continue;
+      }
+
+      // Some files may overlap each other. We find
+      // all files that overlap user_key and process them in order from
+      // newest to oldest. In the context of merge-operator, this can occur at
+      // any level. Otherwise, it only occurs at Level-0 (since Put/Deletes
+      // are always compacted into a single entry).
+      int32_t start_index;
+      if (curr_level_ == 0) {
+        // On Level-0, we read through all files to check for overlap.
+        start_index = 0;
+      } else {
+        // On Level-n (n>=1), files are sorted. Binary search to find the
+        // earliest file whose largest key >= ikey. Search left bound and
+        // right bound are used to narrow the range.
+        if (search_left_bound_ == search_right_bound_) {
+          start_index = search_left_bound_;
+        } else if (search_left_bound_ < search_right_bound_) {
+          if (search_right_bound_ == FileIndexer::kLevelMaxIndex) {
+            search_right_bound_ = curr_file_level_->num_files - 1;
+          }
+          start_index = FindFileInRange(*internal_comparator_,
+              *curr_file_level_, ikey_,
+              search_left_bound_, search_right_bound_);
+        } else {
+          // search_left_bound > search_right_bound, key does not exist in
+          // this level. Since no comparision is done in this level, it will
+          // need to search all files in the next level.
+          search_left_bound_ = 0;
+          search_right_bound_ = FileIndexer::kLevelMaxIndex;
+          curr_level_++;
+          continue;
+        }
+      }
+      start_index_in_curr_level_ = start_index;
+      curr_index_in_curr_level_ = start_index;
+#ifndef NDEBUG
+      prev_file_ = nullptr;
+#endif
+      return true;
+    }
+    // curr_level_ = num_levels_. So, no more levels to search.
+    return false;
+  }
+};
+}  // anonymous namespace
+
 static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
  uint64_t sum = 0;
  for (size_t i = 0; i < files.size() && files[i]; i++) {
@ -82,29 +339,6 @@ Version::~Version() {
  delete[] files_;
 }

-// Find File in FileLevel data structure
-// Within an index range defined by left and right
-int FindFileInRange(const InternalKeyComparator& icmp,
-    const FileLevel& file_level,
-    const Slice& key,
-    uint32_t left,
-    uint32_t right) {
-  while (left < right) {
-    uint32_t mid = (left + right) / 2;
-    const FdWithKeyRange& f = file_level.files[mid];
-    if (icmp.InternalKeyComparator::Compare(f.largest_key, key) < 0) {
-      // Key at "mid.largest" is < "target".  Therefore all
-      // files at or before "mid" are uninteresting.
-      left = mid + 1;
-    } else {
-      // Key at "mid.largest" is >= "target".  Therefore all files
-      // after "mid" are uninteresting.
-      right = mid;
-    }
-  }
-  return right;
-}
-
 int FindFile(const InternalKeyComparator& icmp,
             const FileLevel& file_level,
             const Slice& key) {
@ -507,28 +741,6 @@ static bool SaveValue(void* arg, const ParsedInternalKey& parsed_key,
  return false;
 }

-namespace {
-bool NewestFirstBySeqNo(FileMetaData* a, FileMetaData* b) {
-  if (a->smallest_seqno != b->smallest_seqno) {
-    return a->smallest_seqno > b->smallest_seqno;
-  }
-  if (a->largest_seqno != b->largest_seqno) {
-    return a->largest_seqno > b->largest_seqno;
-  }
-  // Break ties by file number
-  return a->fd.GetNumber() > b->fd.GetNumber();
-}
-bool BySmallestKey(FileMetaData* a, FileMetaData* b,
-                   const InternalKeyComparator* cmp) {
-  int r = cmp->Compare(a->smallest, b->smallest);
-  if (r != 0) {
-    return (r < 0);
-  }
-  // Break ties by file number
-  return (a->fd.GetNumber() < b->fd.GetNumber());
-}
-}  // anonymous namespace
-
 Version::Version(ColumnFamilyData* cfd, VersionSet* vset,
                 uint64_t version_number)
    : cfd_(cfd),
@ -591,166 +803,32 @@ void Version::Get(const ReadOptions& options,
  saver.logger = info_log_;
  saver.statistics = db_statistics_;

-  // We can search level-by-level since entries never hop across
-  // levels. Therefore we are guaranteed that if we find data
-  // in an smaller level, later levels are irrelevant (unless we
-  // are MergeInProgress).
-
-  int32_t search_left_bound = 0;
-  int32_t search_right_bound = FileIndexer::kLevelMaxIndex;
-  for (int level = 0; level < num_non_empty_levels_; ++level) {
-    int num_files = file_levels_[level].num_files;
-    if (num_files == 0) {
-      // When current level is empty, the search bound generated from upper
-      // level must be [0, -1] or [0, FileIndexer::kLevelMaxIndex] if it is
-      // also empty.
-      assert(search_left_bound == 0);
-      assert(search_right_bound == -1 ||
-             search_right_bound == FileIndexer::kLevelMaxIndex);
-      // Since current level is empty, it will need to search all files in the
-      // next level
-      search_left_bound = 0;
-      search_right_bound = FileIndexer::kLevelMaxIndex;
-      continue;
+  FilePicker fp(files_, user_key, ikey, &file_levels_, num_non_empty_levels_,
+      &file_indexer_, user_comparator_, internal_comparator_);
+  FdWithKeyRange* f = fp.GetNextFile();
+  while (f != nullptr) {
+    *status = table_cache_->Get(options, *internal_comparator_, f->fd, ikey,
+                                &saver, SaveValue, MarkKeyMayExist);
+    // TODO: examine the behavior for corrupted key
+    if (!status->ok()) {
+      return;
    }

-    // Prefetch table data to avoid cache miss if possible
-    if (level == 0) {
-      for (int i = 0; i < num_files; ++i) {
-        auto* r = file_levels_[0].files[i].fd.table_reader;
-        if (r) {
-          r->Prepare(ikey);
-        }
-      }
-    }
-
-    // Get the list of files to search in this level
-    FdWithKeyRange* files = file_levels_[level].files;
-
-    // Some files may overlap each other. We find
-    // all files that overlap user_key and process them in order from
-    // newest to oldest. In the context of merge-operator,
-    // this can occur at any level. Otherwise, it only occurs
-    // at Level-0 (since Put/Deletes are always compacted into a single entry).
-    int32_t start_index;
-    if (level == 0) {
-      // On Level-0, we read through all files to check for overlap.
-      start_index = 0;
-    } else {
-      // On Level-n (n>=1), files are sorted. Binary search to find the earliest
-      // file whose largest key >= ikey. Search left bound and right bound are
-      // used to narrow the range.
-      if (search_left_bound == search_right_bound) {
-        start_index = search_left_bound;
-      } else if (search_left_bound < search_right_bound) {
-        if (search_right_bound == FileIndexer::kLevelMaxIndex) {
-          search_right_bound = num_files - 1;
-        }
-        start_index = FindFileInRange(cfd_->internal_comparator(),
-            file_levels_[level], ikey,
-            search_left_bound, search_right_bound);
-      } else {
-        // search_left_bound > search_right_bound, key does not exist in this
-        // level. Since no comparision is done in this level, it will need to
-        // search all files in the next level.
-        search_left_bound = 0;
-        search_right_bound = FileIndexer::kLevelMaxIndex;
-        continue;
-      }
-    }
-    // Traverse each relevant file to find the desired key
-#ifndef NDEBUG
-    FdWithKeyRange* prev_file = nullptr;
-#endif
-
-    for (int32_t i = start_index; i < num_files;) {
-      FdWithKeyRange* f = &files[i];
-      assert(f->fd.GetNumber() == files_[level][i]->fd.GetNumber());
-      int cmp_largest = -1;
-
-      // Do key range filtering of files or/and fractional cascading if:
-      // (1) not all the files are in level 0, or
-      // (2) there are more than 3 Level 0 files
-      // If there are only 3 or less level 0 files in the system, we skip the
-      // key range filtering. In this case, more likely, the system is highly
-      // tuned to minimize number of tables queried by each query, so it is
-      // unlikely that key range filtering is more efficient than querying the
-      // files.
-      if (num_non_empty_levels_ > 1 || num_files > 3) {
-        // Check if key is within a file's range. If search left bound and right
-        // bound point to the same find, we are sure key falls in range.
-        assert(
-            level == 0 || i == start_index || user_comparator_->Compare(
-                user_key, ExtractUserKey(f->smallest_key)) <= 0);
-
-        int cmp_smallest = user_comparator_->Compare(user_key,
-                                        ExtractUserKey(f->smallest_key));
-        if (cmp_smallest >= 0) {
-          cmp_largest = user_comparator_->Compare(user_key,
-                                        ExtractUserKey(f->largest_key));
-        }
-
-        // Setup file search bound for the next level based on the comparison
-        // results
-        if (level > 0) {
-          file_indexer_.GetNextLevelIndex(level, i, cmp_smallest, cmp_largest,
-                                          &search_left_bound,
-                                          &search_right_bound);
-        }
-        // Key falls out of current file's range
-        if (cmp_smallest < 0 || cmp_largest > 0) {
-          if (level == 0) {
-            ++i;
-            continue;
-          } else {
-            break;
-          }
-        }
-      }
-
-#ifndef NDEBUG
-      // Sanity check to make sure that the files are correctly sorted
-      if (prev_file) {
-        if (level != 0) {
-          int comp_sign = internal_comparator_->Compare(prev_file->largest_key,
-               f->smallest_key);
-          assert(comp_sign < 0);
-        } else {
-          // level == 0, the current file cannot be newer than the previous one.
-          // Use compressed data structure, has no attribute seqNo
-          assert(i > 0);
-          assert(!NewestFirstBySeqNo(files_[0][i], files_[0][i-1]));
-        }
-      }
-      prev_file = f;
-#endif
-      *status = table_cache_->Get(options, *internal_comparator_, f->fd, ikey,
-                                  &saver, SaveValue, MarkKeyMayExist);
-      // TODO: examine the behavior for corrupted key
-      if (!status->ok()) {
+    switch (saver.state) {
+      case kNotFound:
+        break;      // Keep searching in other files
+      case kFound:
        return;
-      }
-
-      switch (saver.state) {
-        case kNotFound:
-          break;      // Keep searching in other files
-        case kFound:
-          return;
-        case kDeleted:
-          *status = Status::NotFound();  // Use empty error message for speed
-          return;
-        case kCorrupt:
-          *status = Status::Corruption("corrupted key for ", user_key);
-          return;
-        case kMerge:
-          break;
-      }
-      if (level > 0 && cmp_largest < 0) {
+      case kDeleted:
+        *status = Status::NotFound();  // Use empty error message for speed
+        return;
+      case kCorrupt:
+        *status = Status::Corruption("corrupted key for ", user_key);
+        return;
+      case kMerge:
        break;
-      } else {
-        ++i;
-      }
    }
+    f = fp.GetNextFile();
  }

  if (kMerge == saver.state) {