Implement key shortening functions in ReverseBytewiseComparator

Summary:
Right now ReverseBytewiseComparator::FindShortestSeparator() doesn't really shorten key, and ReverseBytewiseComparator::FindShortestSuccessor() seems to return wrong results. The code is confusing too as it uses BytewiseComparatorImpl::FindShortestSeparator() but the function actually won't do anything if the the first key is larger than the second.

Implement ReverseBytewiseComparator::FindShortestSeparator() and override ReverseBytewiseComparator::FindShortestSuccessor() to be empty.
Closes https://github.com/facebook/rocksdb/pull/3836

Differential Revision: D7959762

Pulled By: siying

fbshipit-source-id: 93acb621c16ce6f23e087ae4e19f7d84d1254683
This commit is contained in:
Siying Dong 2018-05-17 18:24:20 -07:00 committed by Facebook Github Bot
parent 1d7ca20f29
commit 17af09fcce
3 changed files with 192 additions and 1 deletions

View File

@ -26,6 +26,7 @@
* Fix `BackupableDBOptions::max_valid_backups_to_open` to not delete backup files when refcount cannot be accurately determined.
* Fix memory leak when pin_l0_filter_and_index_blocks_in_cache is used with partitioned filters
* Disable rollback of merge operands in WritePrepared transactions to work around an issue in MyRocks. It can be enabled back by setting TransactionDBOptions::rollback_merge_operands to true.
* Fix wrong results by ReverseBytewiseComparator::FindShortSuccessor()
### Java API Changes
* Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances.

View File

@ -2,6 +2,7 @@
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include <array>
#include <map>
#include <string>
@ -433,6 +434,145 @@ TEST_F(ComparatorDBTest, TwoStrComparator) {
}
}
TEST_F(ComparatorDBTest, FindShortestSeparator) {
std::string s1 = "abc1xyz";
std::string s2 = "abc3xy";
BytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc2", s1);
s1 = "abc5xyztt";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc5", s1);
s1 = "abc3";
s2 = "abc2xy";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc3", s1);
s1 = "abc3xyz";
s2 = "abc2xy";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc3", s1);
s1 = "abc3xyz";
s2 = "abc2";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_EQ("abc3", s1);
std::string old_s1 = s1 = "abc2xy";
s2 = "abc2";
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
ASSERT_TRUE(old_s1 >= s1);
ASSERT_TRUE(s1 > s2);
}
TEST_F(ComparatorDBTest, SeparatorSuccessorRandomizeTest) {
// Char list for boundary cases.
std::array<unsigned char, 6> char_list{{0, 1, 2, 253, 254, 255}};
Random rnd(301);
for (int attempts = 0; attempts < 1000; attempts++) {
uint32_t size1 = rnd.Skewed(4);
uint32_t size2;
if (rnd.OneIn(2)) {
// size2 to be random size
size2 = rnd.Skewed(4);
} else {
// size1 is within [-2, +2] of size1
int diff = static_cast<int>(rnd.Uniform(5)) - 2;
int tmp_size2 = static_cast<int>(size1) + diff;
if (tmp_size2 < 0) {
tmp_size2 = 0;
}
size2 = static_cast<uint32_t>(tmp_size2);
}
std::string s1;
std::string s2;
for (uint32_t i = 0; i < size1; i++) {
if (rnd.OneIn(2)) {
// Use random byte
s1 += static_cast<char>(rnd.Uniform(256));
} else {
// Use one byte in char_list
char c = static_cast<char>(char_list[rnd.Uniform(sizeof(char_list))]);
s1 += c;
}
}
// First set s2 to be the same as s1, and then modify s2.
s2 = s1;
s2.resize(size2);
// We start from the back of the string
if (size2 > 0) {
uint32_t pos = size2 - 1;
do {
if (pos >= size1 || rnd.OneIn(4)) {
// For 1/4 chance, use random byte
s2[pos] = static_cast<char>(rnd.Uniform(256));
} else if (rnd.OneIn(4)) {
// In 1/4 chance, stop here.
break;
} else {
// Create a char within [-2, +2] of the matching char of s1.
int diff = static_cast<int>(rnd.Uniform(5)) - 2;
// char may be signed or unsigned based on platform.
int s1_char = static_cast<int>(static_cast<unsigned char>(s1[pos]));
int s2_char = s1_char + diff;
if (s2_char < 0) {
s2_char = 0;
}
if (s2_char > 255) {
s2_char = 255;
}
s2[pos] = static_cast<char>(s2_char);
}
} while (pos-- != 0);
}
// Test separators
for (int rev = 0; rev < 2; rev++) {
if (rev == 1) {
// switch s1 and s2
std::string t = s1;
s1 = s2;
s2 = t;
}
std::string separator = s1;
BytewiseComparator()->FindShortestSeparator(&separator, s2);
std::string rev_separator = s1;
ReverseBytewiseComparator()->FindShortestSeparator(&rev_separator, s2);
if (s1 == s2) {
ASSERT_EQ(s1, separator);
ASSERT_EQ(s2, rev_separator);
} else if (s1 < s2) {
ASSERT_TRUE(s1 <= separator);
ASSERT_TRUE(s2 > separator);
ASSERT_LE(separator.size(), std::max(s1.size(), s2.size()));
ASSERT_EQ(s1, rev_separator);
} else {
ASSERT_TRUE(s1 >= rev_separator);
ASSERT_TRUE(s2 < rev_separator);
ASSERT_LE(rev_separator.size(), std::max(s1.size(), s2.size()));
ASSERT_EQ(s1, separator);
}
}
// Test successors
std::string succ = s1;
BytewiseComparator()->FindShortSuccessor(&succ);
ASSERT_TRUE(succ >= s1);
succ = s1;
ReverseBytewiseComparator()->FindShortSuccessor(&succ);
ASSERT_TRUE(succ <= s1);
}
}
} // namespace rocksdb
int main(int argc, char** argv) {

View File

@ -111,8 +111,58 @@ class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl {
virtual int Compare(const Slice& a, const Slice& b) const override {
return -a.compare(b);
}
};
void FindShortestSeparator(std::string* start,
const Slice& limit) const override {
// Find length of common prefix
size_t min_length = std::min(start->size(), limit.size());
size_t diff_index = 0;
while ((diff_index < min_length) &&
((*start)[diff_index] == limit[diff_index])) {
diff_index++;
}
assert(diff_index <= min_length);
if (diff_index == min_length) {
// Do not shorten if one string is a prefix of the other
//
// We could handle cases like:
// V
// A A 2 X Y
// A A 2
// in a similar way as BytewiseComparator::FindShortestSeparator().
// We keep it simple by not implementing it. We can come back to it
// later when needed.
} else {
uint8_t start_byte = static_cast<uint8_t>((*start)[diff_index]);
uint8_t limit_byte = static_cast<uint8_t>(limit[diff_index]);
if (start_byte > limit_byte && diff_index < start->size() - 1) {
// Case like
// V
// A A 3 A A
// A A 1 B B
//
// or
// v
// A A 2 A A
// A A 1 B B
// In this case "AA2" will be good.
#ifndef NDEBUG
std::string old_start = *start;
#endif
start->resize(diff_index + 1);
#ifndef NDEBUG
assert(old_start >= *start);
#endif
assert(Slice(*start).compare(limit) > 0);
}
}
}
void FindShortSuccessor(std::string* /*key*/) const override {
// Don't do anything for simplicity.
}
};
}// namespace
const Comparator* BytewiseComparator() {