Improve BytewiseComparatorImpl::FindShortestSeparator
Summary: The current implementation find the first different byte and try to increment it, if it cannot it return the original key we can improve this by keep going after the first different byte to find the first non 0xFF byte and increment it After trying this patch on some logdevice sst files I see decrease in there index block size by 8.5% Test Plan: existing tests and updated test Reviewers: yhchiang, andrewkr, sdong Reviewed By: sdong Subscribers: andrewkr, dhruba Differential Revision: https://reviews.facebook.net/D56241
This commit is contained in:
parent
f3eb0b5b8c
commit
7c14abf2c7
@ -224,7 +224,7 @@ void GetExpectedTableProperties(TableProperties* expected_tp,
|
|||||||
const int kBloomBitsPerKey,
|
const int kBloomBitsPerKey,
|
||||||
const size_t kBlockSize) {
|
const size_t kBlockSize) {
|
||||||
const int kKeyCount = kTableCount * kKeysPerTable;
|
const int kKeyCount = kTableCount * kKeysPerTable;
|
||||||
const int kAvgSuccessorSize = kKeySize / 2;
|
const int kAvgSuccessorSize = kKeySize / 5;
|
||||||
const int kEncodingSavePerKey = kKeySize / 4;
|
const int kEncodingSavePerKey = kKeySize / 4;
|
||||||
expected_tp->raw_key_size = kKeyCount * (kKeySize + 8);
|
expected_tp->raw_key_size = kKeyCount * (kKeySize + 8);
|
||||||
expected_tp->raw_value_size = kKeyCount * kValueSize;
|
expected_tp->raw_value_size = kKeyCount * kValueSize;
|
||||||
|
@ -92,6 +92,30 @@ TEST_F(FormatTest, InternalKeyShortSeparator) {
|
|||||||
Shorten(IKey("foo", 100, kTypeValue),
|
Shorten(IKey("foo", 100, kTypeValue),
|
||||||
IKey("hello", 200, kTypeValue)));
|
IKey("hello", 200, kTypeValue)));
|
||||||
|
|
||||||
|
ASSERT_EQ(IKey("ABC2", kMaxSequenceNumber, kValueTypeForSeek),
|
||||||
|
Shorten(IKey("ABC1AAAAA", 100, kTypeValue),
|
||||||
|
IKey("ABC2ABB", 200, kTypeValue)));
|
||||||
|
|
||||||
|
ASSERT_EQ(IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek),
|
||||||
|
Shorten(IKey("AAA1AAA", 100, kTypeValue),
|
||||||
|
IKey("AAA2AA", 200, kTypeValue)));
|
||||||
|
|
||||||
|
ASSERT_EQ(
|
||||||
|
IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek),
|
||||||
|
Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA4", 200, kTypeValue)));
|
||||||
|
|
||||||
|
ASSERT_EQ(
|
||||||
|
IKey("AAA1B", kMaxSequenceNumber, kValueTypeForSeek),
|
||||||
|
Shorten(IKey("AAA1AAA", 100, kTypeValue), IKey("AAA2", 200, kTypeValue)));
|
||||||
|
|
||||||
|
ASSERT_EQ(IKey("AAA2", kMaxSequenceNumber, kValueTypeForSeek),
|
||||||
|
Shorten(IKey("AAA1AAA", 100, kTypeValue),
|
||||||
|
IKey("AAA2A", 200, kTypeValue)));
|
||||||
|
|
||||||
|
ASSERT_EQ(
|
||||||
|
IKey("AAA1", 100, kTypeValue),
|
||||||
|
Shorten(IKey("AAA1", 100, kTypeValue), IKey("AAA2", 200, kTypeValue)));
|
||||||
|
|
||||||
// When start user key is prefix of limit user key
|
// When start user key is prefix of limit user key
|
||||||
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
ASSERT_EQ(IKey("foo", 100, kTypeValue),
|
||||||
Shorten(IKey("foo", 100, kTypeValue),
|
Shorten(IKey("foo", 100, kTypeValue),
|
||||||
|
@ -49,13 +49,41 @@ class BytewiseComparatorImpl : public Comparator {
|
|||||||
if (diff_index >= min_length) {
|
if (diff_index >= min_length) {
|
||||||
// Do not shorten if one string is a prefix of the other
|
// Do not shorten if one string is a prefix of the other
|
||||||
} else {
|
} else {
|
||||||
uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
|
uint8_t start_byte = static_cast<uint8_t>((*start)[diff_index]);
|
||||||
if (diff_byte < static_cast<uint8_t>(0xff) &&
|
uint8_t limit_byte = static_cast<uint8_t>(limit[diff_index]);
|
||||||
diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
|
if (start_byte >= limit_byte || (diff_index == start->size() - 1)) {
|
||||||
|
// Cannot shorten since limit is smaller than start or start is
|
||||||
|
// already the shortest possible.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
assert(start_byte < limit_byte);
|
||||||
|
|
||||||
|
if (diff_index < limit.size() - 1 || start_byte + 1 < limit_byte) {
|
||||||
(*start)[diff_index]++;
|
(*start)[diff_index]++;
|
||||||
start->resize(diff_index + 1);
|
start->resize(diff_index + 1);
|
||||||
assert(Compare(*start, limit) < 0);
|
} else {
|
||||||
|
// v
|
||||||
|
// A A 1 A A A
|
||||||
|
// A A 2
|
||||||
|
//
|
||||||
|
// Incrementing the current byte will make start bigger than limit, we
|
||||||
|
// will skip this byte, and find the first non 0xFF byte in start and
|
||||||
|
// increment it.
|
||||||
|
diff_index++;
|
||||||
|
|
||||||
|
while (diff_index < start->size()) {
|
||||||
|
// Keep moving until we find the first non 0xFF byte to
|
||||||
|
// increment it
|
||||||
|
if (static_cast<uint8_t>((*start)[diff_index]) <
|
||||||
|
static_cast<uint8_t>(0xff)) {
|
||||||
|
(*start)[diff_index]++;
|
||||||
|
start->resize(diff_index + 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
diff_index++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
assert(Compare(*start, limit) < 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user