Implement key shortening functions in ReverseBytewiseComparator
Summary: Right now ReverseBytewiseComparator::FindShortestSeparator() doesn't really shorten key, and ReverseBytewiseComparator::FindShortestSuccessor() seems to return wrong results. The code is confusing too as it uses BytewiseComparatorImpl::FindShortestSeparator() but the function actually won't do anything if the the first key is larger than the second. Implement ReverseBytewiseComparator::FindShortestSeparator() and override ReverseBytewiseComparator::FindShortestSuccessor() to be empty. Closes https://github.com/facebook/rocksdb/pull/3836 Differential Revision: D7959762 Pulled By: siying fbshipit-source-id: 93acb621c16ce6f23e087ae4e19f7d84d1254683
This commit is contained in:
parent
1d7ca20f29
commit
17af09fcce
@ -26,6 +26,7 @@
|
||||
* Fix `BackupableDBOptions::max_valid_backups_to_open` to not delete backup files when refcount cannot be accurately determined.
|
||||
* Fix memory leak when pin_l0_filter_and_index_blocks_in_cache is used with partitioned filters
|
||||
* Disable rollback of merge operands in WritePrepared transactions to work around an issue in MyRocks. It can be enabled back by setting TransactionDBOptions::rollback_merge_operands to true.
|
||||
* Fix wrong results by ReverseBytewiseComparator::FindShortSuccessor()
|
||||
|
||||
### Java API Changes
|
||||
* Add `BlockBasedTableConfig.setBlockCache` to allow sharing a block cache across DB instances.
|
||||
|
@ -2,6 +2,7 @@
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
@ -433,6 +434,145 @@ TEST_F(ComparatorDBTest, TwoStrComparator) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(ComparatorDBTest, FindShortestSeparator) {
|
||||
std::string s1 = "abc1xyz";
|
||||
std::string s2 = "abc3xy";
|
||||
|
||||
BytewiseComparator()->FindShortestSeparator(&s1, s2);
|
||||
ASSERT_EQ("abc2", s1);
|
||||
|
||||
s1 = "abc5xyztt";
|
||||
|
||||
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
|
||||
ASSERT_EQ("abc5", s1);
|
||||
|
||||
s1 = "abc3";
|
||||
s2 = "abc2xy";
|
||||
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
|
||||
ASSERT_EQ("abc3", s1);
|
||||
|
||||
s1 = "abc3xyz";
|
||||
s2 = "abc2xy";
|
||||
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
|
||||
ASSERT_EQ("abc3", s1);
|
||||
|
||||
s1 = "abc3xyz";
|
||||
s2 = "abc2";
|
||||
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
|
||||
ASSERT_EQ("abc3", s1);
|
||||
|
||||
std::string old_s1 = s1 = "abc2xy";
|
||||
s2 = "abc2";
|
||||
ReverseBytewiseComparator()->FindShortestSeparator(&s1, s2);
|
||||
ASSERT_TRUE(old_s1 >= s1);
|
||||
ASSERT_TRUE(s1 > s2);
|
||||
}
|
||||
|
||||
TEST_F(ComparatorDBTest, SeparatorSuccessorRandomizeTest) {
|
||||
// Char list for boundary cases.
|
||||
std::array<unsigned char, 6> char_list{{0, 1, 2, 253, 254, 255}};
|
||||
Random rnd(301);
|
||||
|
||||
for (int attempts = 0; attempts < 1000; attempts++) {
|
||||
uint32_t size1 = rnd.Skewed(4);
|
||||
uint32_t size2;
|
||||
|
||||
if (rnd.OneIn(2)) {
|
||||
// size2 to be random size
|
||||
size2 = rnd.Skewed(4);
|
||||
} else {
|
||||
// size1 is within [-2, +2] of size1
|
||||
int diff = static_cast<int>(rnd.Uniform(5)) - 2;
|
||||
int tmp_size2 = static_cast<int>(size1) + diff;
|
||||
if (tmp_size2 < 0) {
|
||||
tmp_size2 = 0;
|
||||
}
|
||||
size2 = static_cast<uint32_t>(tmp_size2);
|
||||
}
|
||||
|
||||
std::string s1;
|
||||
std::string s2;
|
||||
for (uint32_t i = 0; i < size1; i++) {
|
||||
if (rnd.OneIn(2)) {
|
||||
// Use random byte
|
||||
s1 += static_cast<char>(rnd.Uniform(256));
|
||||
} else {
|
||||
// Use one byte in char_list
|
||||
char c = static_cast<char>(char_list[rnd.Uniform(sizeof(char_list))]);
|
||||
s1 += c;
|
||||
}
|
||||
}
|
||||
|
||||
// First set s2 to be the same as s1, and then modify s2.
|
||||
s2 = s1;
|
||||
s2.resize(size2);
|
||||
// We start from the back of the string
|
||||
if (size2 > 0) {
|
||||
uint32_t pos = size2 - 1;
|
||||
do {
|
||||
if (pos >= size1 || rnd.OneIn(4)) {
|
||||
// For 1/4 chance, use random byte
|
||||
s2[pos] = static_cast<char>(rnd.Uniform(256));
|
||||
} else if (rnd.OneIn(4)) {
|
||||
// In 1/4 chance, stop here.
|
||||
break;
|
||||
} else {
|
||||
// Create a char within [-2, +2] of the matching char of s1.
|
||||
int diff = static_cast<int>(rnd.Uniform(5)) - 2;
|
||||
// char may be signed or unsigned based on platform.
|
||||
int s1_char = static_cast<int>(static_cast<unsigned char>(s1[pos]));
|
||||
int s2_char = s1_char + diff;
|
||||
if (s2_char < 0) {
|
||||
s2_char = 0;
|
||||
}
|
||||
if (s2_char > 255) {
|
||||
s2_char = 255;
|
||||
}
|
||||
s2[pos] = static_cast<char>(s2_char);
|
||||
}
|
||||
} while (pos-- != 0);
|
||||
}
|
||||
|
||||
// Test separators
|
||||
for (int rev = 0; rev < 2; rev++) {
|
||||
if (rev == 1) {
|
||||
// switch s1 and s2
|
||||
std::string t = s1;
|
||||
s1 = s2;
|
||||
s2 = t;
|
||||
}
|
||||
std::string separator = s1;
|
||||
BytewiseComparator()->FindShortestSeparator(&separator, s2);
|
||||
std::string rev_separator = s1;
|
||||
ReverseBytewiseComparator()->FindShortestSeparator(&rev_separator, s2);
|
||||
|
||||
if (s1 == s2) {
|
||||
ASSERT_EQ(s1, separator);
|
||||
ASSERT_EQ(s2, rev_separator);
|
||||
} else if (s1 < s2) {
|
||||
ASSERT_TRUE(s1 <= separator);
|
||||
ASSERT_TRUE(s2 > separator);
|
||||
ASSERT_LE(separator.size(), std::max(s1.size(), s2.size()));
|
||||
ASSERT_EQ(s1, rev_separator);
|
||||
} else {
|
||||
ASSERT_TRUE(s1 >= rev_separator);
|
||||
ASSERT_TRUE(s2 < rev_separator);
|
||||
ASSERT_LE(rev_separator.size(), std::max(s1.size(), s2.size()));
|
||||
ASSERT_EQ(s1, separator);
|
||||
}
|
||||
}
|
||||
|
||||
// Test successors
|
||||
std::string succ = s1;
|
||||
BytewiseComparator()->FindShortSuccessor(&succ);
|
||||
ASSERT_TRUE(succ >= s1);
|
||||
|
||||
succ = s1;
|
||||
ReverseBytewiseComparator()->FindShortSuccessor(&succ);
|
||||
ASSERT_TRUE(succ <= s1);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -111,8 +111,58 @@ class ReverseBytewiseComparatorImpl : public BytewiseComparatorImpl {
|
||||
virtual int Compare(const Slice& a, const Slice& b) const override {
|
||||
return -a.compare(b);
|
||||
}
|
||||
};
|
||||
|
||||
void FindShortestSeparator(std::string* start,
|
||||
const Slice& limit) const override {
|
||||
// Find length of common prefix
|
||||
size_t min_length = std::min(start->size(), limit.size());
|
||||
size_t diff_index = 0;
|
||||
while ((diff_index < min_length) &&
|
||||
((*start)[diff_index] == limit[diff_index])) {
|
||||
diff_index++;
|
||||
}
|
||||
|
||||
assert(diff_index <= min_length);
|
||||
if (diff_index == min_length) {
|
||||
// Do not shorten if one string is a prefix of the other
|
||||
//
|
||||
// We could handle cases like:
|
||||
// V
|
||||
// A A 2 X Y
|
||||
// A A 2
|
||||
// in a similar way as BytewiseComparator::FindShortestSeparator().
|
||||
// We keep it simple by not implementing it. We can come back to it
|
||||
// later when needed.
|
||||
} else {
|
||||
uint8_t start_byte = static_cast<uint8_t>((*start)[diff_index]);
|
||||
uint8_t limit_byte = static_cast<uint8_t>(limit[diff_index]);
|
||||
if (start_byte > limit_byte && diff_index < start->size() - 1) {
|
||||
// Case like
|
||||
// V
|
||||
// A A 3 A A
|
||||
// A A 1 B B
|
||||
//
|
||||
// or
|
||||
// v
|
||||
// A A 2 A A
|
||||
// A A 1 B B
|
||||
// In this case "AA2" will be good.
|
||||
#ifndef NDEBUG
|
||||
std::string old_start = *start;
|
||||
#endif
|
||||
start->resize(diff_index + 1);
|
||||
#ifndef NDEBUG
|
||||
assert(old_start >= *start);
|
||||
#endif
|
||||
assert(Slice(*start).compare(limit) > 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FindShortSuccessor(std::string* /*key*/) const override {
|
||||
// Don't do anything for simplicity.
|
||||
}
|
||||
};
|
||||
}// namespace
|
||||
|
||||
const Comparator* BytewiseComparator() {
|
||||
|
Loading…
Reference in New Issue
Block a user