Implement key shortening functions in ReverseBytewiseComparator

Summary: Right now ReverseBytewiseComparator::FindShortestSeparator() doesn't really shorten key, and ReverseBytewiseComparator::FindShortestSuccessor() seems to return wrong results. The code is confusing too as it uses BytewiseComparatorImpl::FindShortestSeparator() but the function actually won't do anything if the the first key is larger than the second. Implement ReverseBytewiseComparator::FindShortestSeparator() and override ReverseBytewiseComparator::FindShortestSuccessor() to be empty. Closes https://github.com/facebook/rocksdb/pull/3836 Differential Revision: D7959762 Pulled By: siying fbshipit-source-id: 93acb621c16ce6f23e087ae4e19f7d84d1254683
2018-05-17 18:24:20 -07:00 · 2018-05-17 18:24:20 -07:00 · 17af09fcce
commit 17af09fcce
parent 1d7ca20f29
3 changed files with 192 additions and 1 deletions
--- a/HISTORY.md
+++ b/HISTORY.md
@ -26,6 +26,7 @@
 * Fix `BackupableDBOptions::max_valid_backups_to_open` to not delete backup files when refcount cannot be accurately determined.
 * Fix memory leak when pin_l0_filter_and_index_blocks_in_cache is used with partitioned filters
 * Disable rollback of merge operands in WritePrepared transactions to work around an issue in MyRocks. It can be enabled back by setting TransactionDBOptions::rollback_merge_operands to true.
+* Fix wrong results by ReverseBytewiseComparator::FindShortSuccessor()

 ### Java API Changes
 * Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances.
--- a/db/comparator_db_test.cc
+++ b/db/comparator_db_test.cc
@ -2,6 +2,7 @@
 //  This source code is licensed under both the GPLv2 (found in the
 //  COPYING file in the root directory) and Apache 2.0 License
 //  (found in the LICENSE.Apache file in the root directory).
+#include <array>
 #include <map>
 #include <string>

@ -433,6 +434,145 @@ TEST_F(ComparatorDBTest, TwoStrComparator) {
  }
 }

+TEST_F(ComparatorDBTest, FindShortestSeparator) {
+  std::string s1 = "abc1xyz";
+  std::string s2 = "abc3xy";
+
+  BytewiseComparator()->FindShortestSeparator(&s1, s2);
+  ASSERT_EQ("abc2", s1);
+
+  s1 = "abc5xyztt";
+
+  ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
+  ASSERT_EQ("abc5", s1);
+
+  s1 = "abc3";
+  s2 = "abc2xy";
+  ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
+  ASSERT_EQ("abc3", s1);
+
+  s1 = "abc3xyz";
+  s2 = "abc2xy";
+  ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
+  ASSERT_EQ("abc3", s1);
+
+  s1 = "abc3xyz";
+  s2 = "abc2";
+  ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
+  ASSERT_EQ("abc3", s1);
+
+  std::string old_s1 = s1 = "abc2xy";
+  s2 = "abc2";
+  ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
+  ASSERT_TRUE(old_s1 >= s1);
+  ASSERT_TRUE(s1 > s2);
+}
+
+TEST_F(ComparatorDBTest, SeparatorSuccessorRandomizeTest) {
+  // Char list for boundary cases.
+  std::array<unsigned char, 6> char_list{{0, 1, 2, 253, 254, 255}};
+  Random rnd(301);
+
+  for (int attempts = 0; attempts < 1000; attempts++) {
+    uint32_t size1 = rnd.Skewed(4);
+    uint32_t size2;
+
+    if (rnd.OneIn(2)) {
+      // size2 to be random size
+      size2 = rnd.Skewed(4);
+    } else {
+      // size1 is within [-2, +2] of size1
+      int diff = static_cast<int>(rnd.Uniform(5)) - 2;
+      int tmp_size2 = static_cast<int>(size1) + diff;
+      if (tmp_size2 < 0) {
+        tmp_size2 = 0;
+      }
+      size2 = static_cast<uint32_t>(tmp_size2);
+    }
+
+    std::string s1;
+    std::string s2;
+    for (uint32_t i = 0; i < size1; i++) {
+      if (rnd.OneIn(2)) {
+        // Use random byte
+        s1 += static_cast<char>(rnd.Uniform(256));
+      } else {
+        // Use one byte in char_list
+        char c = static_cast<char>(char_list[rnd.Uniform(sizeof(char_list))]);
+        s1 += c;
+      }
+    }
+
+    // First set s2 to be the same as s1, and then modify s2.
+    s2 = s1;
+    s2.resize(size2);
+    // We start from the back of the string
+    if (size2 > 0) {
+      uint32_t pos = size2 - 1;
+      do {
+        if (pos >= size1 || rnd.OneIn(4)) {
+          // For 1/4 chance, use random byte
+          s2[pos] = static_cast<char>(rnd.Uniform(256));
+        } else if (rnd.OneIn(4)) {
+          // In 1/4 chance, stop here.
+          break;
+        } else {
+          // Create a char within [-2, +2] of the matching char of s1.
+          int diff = static_cast<int>(rnd.Uniform(5)) - 2;
+          // char may be signed or unsigned based on platform.
+          int s1_char = static_cast<int>(static_cast<unsigned char>(s1[pos]));
+          int s2_char = s1_char + diff;
+          if (s2_char < 0) {
+            s2_char = 0;
+          }
+          if (s2_char > 255) {
+            s2_char = 255;
+          }
+          s2[pos] = static_cast<char>(s2_char);
+        }
+      } while (pos-- != 0);
+    }
+
+    // Test separators
+    for (int rev = 0; rev < 2; rev++) {
+      if (rev == 1) {
+        // switch s1 and s2
+        std::string t = s1;
+        s1 = s2;
+        s2 = t;
+      }
+      std::string separator = s1;
+      BytewiseComparator()->FindShortestSeparator(&separator, s2);
+      std::string rev_separator = s1;
+      ReverseBytewiseComparator()->FindShortestSeparator(&rev_separator, s2);
+
+      if (s1 == s2) {
+        ASSERT_EQ(s1, separator);
+        ASSERT_EQ(s2, rev_separator);
+      } else if (s1 < s2) {
+        ASSERT_TRUE(s1 <= separator);
+        ASSERT_TRUE(s2 > separator);
+        ASSERT_LE(separator.size(), std::max(s1.size(), s2.size()));
+        ASSERT_EQ(s1, rev_separator);
+      } else {
+        ASSERT_TRUE(s1 >= rev_separator);
+        ASSERT_TRUE(s2 < rev_separator);
+        ASSERT_LE(rev_separator.size(), std::max(s1.size(), s2.size()));
+        ASSERT_EQ(s1, separator);
+      }
+    }
+
+    // Test successors
+    std::string succ = s1;
+    BytewiseComparator()->FindShortSuccessor(&succ);
+    ASSERT_TRUE(succ >= s1);
+
+    succ = s1;
+    ReverseBytewiseComparator()->FindShortSuccessor(&succ);
+    ASSERT_TRUE(succ <= s1);
+  }
+}
+
 }  // namespace rocksdb

 int main(int argc, char** argv) {
--- a/util/comparator.cc
+++ b/util/comparator.cc
@ -111,8 +111,58 @@ class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl {
  virtual int Compare(const Slice& a, const Slice& b) const override {
    return -a.compare(b);
  }
-};

+  void FindShortestSeparator(std::string* start,
+                             const Slice& limit) const override {
+    // Find length of common prefix
+    size_t min_length = std::min(start->size(), limit.size());
+    size_t diff_index = 0;
+    while ((diff_index < min_length) &&
+           ((*start)[diff_index] == limit[diff_index])) {
+      diff_index++;
+    }
+
+    assert(diff_index <= min_length);
+    if (diff_index == min_length) {
+      // Do not shorten if one string is a prefix of the other
+      //
+      // We could handle cases like:
+      //     V
+      // A A 2 X Y
+      // A A 2
+      // in a similar way as BytewiseComparator::FindShortestSeparator().
+      // We keep it simple by not implementing it. We can come back to it
+      // later when needed.
+    } else {
+      uint8_t start_byte = static_cast<uint8_t>((*start)[diff_index]);
+      uint8_t limit_byte = static_cast<uint8_t>(limit[diff_index]);
+      if (start_byte > limit_byte && diff_index < start->size() - 1) {
+        // Case like
+        //     V
+        // A A 3 A A
+        // A A 1 B B
+        //
+        // or
+        //     v
+        // A A 2 A A
+        // A A 1 B B
+        // In this case "AA2" will be good.
+#ifndef NDEBUG
+        std::string old_start = *start;
+#endif
+        start->resize(diff_index + 1);
+#ifndef NDEBUG
+        assert(old_start >= *start);
+#endif
+        assert(Slice(*start).compare(limit) > 0);
+      }
+    }
+  }
+
+  void FindShortSuccessor(std::string* /*key*/) const override {
+    // Don't do anything for simplicity.
+  }
+};
 }// namespace

 const Comparator* BytewiseComparator() {