Fix the sorting of KeyContexts for batched MultiGet (#8633)

Summary:
`CompareKeyContext::operator()` on the trunk has a bug: when comparing
column family IDs, `lhs` is used for both sides of the comparison. This
results in the `KeyContext`s getting sorted solely based on key, which
in turn means that keys with the same column family do not necessarily
form a single range in the sorted list. This violates an assumption of the
batched `MultiGet` logic, leading to the same column family
showing up multiple times in the list of `MultiGetColumnFamilyData`.
The end result is the code attempting to check out the thread-local
`SuperVersion` for the same CF multiple times, causing an
assertion violation in debug builds and memory corruption/crash in
release builds.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8633

Test Plan: `make check`

Reviewed By: riversand963

Differential Revision: D30169182

Pulled By: ltamasi

fbshipit-source-id: a47710652df7e95b14b40fb710924c11a8478023
This commit is contained in:
Levi Tamasi 2021-08-06 16:26:04 -07:00 committed by Peter Dillinger
parent 9d95050ca7
commit cc4c9e80e4
3 changed files with 44 additions and 35 deletions

View File

@ -1,4 +1,8 @@
# Rocksdb Change Log # Rocksdb Change Log
## Unreleased
### Bug Fixes
* Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`.
## 6.16.4 (03/30/2021) ## 6.16.4 (03/30/2021)
### Bug Fixes ### Bug Fixes
* Fix build on ppc64 and musl build. * Fix build on ppc64 and musl build.

View File

@ -1361,6 +1361,28 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) {
} }
} }
TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFUnsorted) {
Options options = CurrentOptions();
CreateAndReopenWithCF({"one", "two"}, options);
ASSERT_OK(Put(1, "foo", "bar"));
ASSERT_OK(Put(2, "baz", "xyz"));
ASSERT_OK(Put(1, "abc", "def"));
// Note: keys for the same CF do not form a consecutive range
std::vector<int> cfs{1, 2, 1};
std::vector<std::string> keys{"foo", "baz", "abc"};
std::vector<std::string> values;
values =
MultiGet(cfs, keys, /* snapshot */ nullptr, /* batched */ GetParam());
ASSERT_EQ(values.size(), 3);
ASSERT_EQ(values[0], "bar");
ASSERT_EQ(values[1], "xyz");
ASSERT_EQ(values[2], "def");
}
INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam, INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam,
testing::Bool()); testing::Bool());

View File

@ -2164,20 +2164,18 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
multiget_cf_data; multiget_cf_data;
size_t cf_start = 0; size_t cf_start = 0;
ColumnFamilyHandle* cf = sorted_keys[0]->column_family; ColumnFamilyHandle* cf = sorted_keys[0]->column_family;
for (size_t i = 0; i < num_keys; ++i) { for (size_t i = 0; i < num_keys; ++i) {
KeyContext* key_ctx = sorted_keys[i]; KeyContext* key_ctx = sorted_keys[i];
if (key_ctx->column_family != cf) { if (key_ctx->column_family != cf) {
multiget_cf_data.emplace_back( multiget_cf_data.emplace_back(cf, cf_start, i - cf_start, nullptr);
MultiGetColumnFamilyData(cf, cf_start, i - cf_start, nullptr));
cf_start = i; cf_start = i;
cf = key_ctx->column_family; cf = key_ctx->column_family;
} }
} }
{
// multiget_cf_data.emplace_back( multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
// MultiGetColumnFamilyData(cf, cf_start, num_keys - cf_start, nullptr));
multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
}
std::function<MultiGetColumnFamilyData*( std::function<MultiGetColumnFamilyData*(
autovector<MultiGetColumnFamilyData, autovector<MultiGetColumnFamilyData,
MultiGetContext::MAX_BATCH_SIZE>::iterator&)> MultiGetContext::MAX_BATCH_SIZE>::iterator&)>
@ -2237,7 +2235,7 @@ struct CompareKeyContext {
static_cast<ColumnFamilyHandleImpl*>(lhs->column_family); static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
uint32_t cfd_id1 = cfh->cfd()->GetID(); uint32_t cfd_id1 = cfh->cfd()->GetID();
const Comparator* comparator = cfh->cfd()->user_comparator(); const Comparator* comparator = cfh->cfd()->user_comparator();
cfh = static_cast<ColumnFamilyHandleImpl*>(lhs->column_family); cfh = static_cast<ColumnFamilyHandleImpl*>(rhs->column_family);
uint32_t cfd_id2 = cfh->cfd()->GetID(); uint32_t cfd_id2 = cfh->cfd()->GetID();
if (cfd_id1 < cfd_id2) { if (cfd_id1 < cfd_id2) {
@ -2261,39 +2259,24 @@ struct CompareKeyContext {
void DBImpl::PrepareMultiGetKeys( void DBImpl::PrepareMultiGetKeys(
size_t num_keys, bool sorted_input, size_t num_keys, bool sorted_input,
autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) { autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) {
#ifndef NDEBUG
if (sorted_input) { if (sorted_input) {
for (size_t index = 0; index < sorted_keys->size(); ++index) { #ifndef NDEBUG
if (index > 0) { CompareKeyContext key_context_less;
KeyContext* lhs = (*sorted_keys)[index - 1];
KeyContext* rhs = (*sorted_keys)[index];
ColumnFamilyHandleImpl* cfh =
static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
uint32_t cfd_id1 = cfh->cfd()->GetID();
const Comparator* comparator = cfh->cfd()->user_comparator();
cfh =
static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
uint32_t cfd_id2 = cfh->cfd()->GetID();
assert(cfd_id1 <= cfd_id2); for (size_t index = 1; index < sorted_keys->size(); ++index) {
if (cfd_id1 < cfd_id2) { const KeyContext* const lhs = (*sorted_keys)[index - 1];
continue; const KeyContext* const rhs = (*sorted_keys)[index];
}
// Both keys are from the same column family // lhs should be <= rhs, or in other words, rhs should NOT be < lhs
int cmp = comparator->CompareWithoutTimestamp( assert(!key_context_less(rhs, lhs));
*(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false);
assert(cmp <= 0);
}
index++;
} }
}
#endif #endif
if (!sorted_input) {
CompareKeyContext sort_comparator; return;
std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
sort_comparator);
} }
std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
CompareKeyContext());
} }
void DBImpl::MultiGet(const ReadOptions& read_options, void DBImpl::MultiGet(const ReadOptions& read_options,