Fix the sorting of KeyContexts for batched MultiGet (#8633)
Summary: `CompareKeyContext::operator()` on the trunk has a bug: when comparing column family IDs, `lhs` is used for both sides of the comparison. This results in the `KeyContext`s getting sorted solely based on key, which in turn means that keys with the same column family do not necessarily form a single range in the sorted list. This violates an assumption of the batched `MultiGet` logic, leading to the same column family showing up multiple times in the list of `MultiGetColumnFamilyData`. The end result is the code attempting to check out the thread-local `SuperVersion` for the same CF multiple times, causing an assertion violation in debug builds and memory corruption/crash in release builds. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8633 Test Plan: `make check` Reviewed By: riversand963 Differential Revision: D30169182 Pulled By: ltamasi fbshipit-source-id: a47710652df7e95b14b40fb710924c11a8478023
This commit is contained in:
parent
9d95050ca7
commit
cc4c9e80e4
@ -1,4 +1,8 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
### Bug Fixes
|
||||
* Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`.
|
||||
|
||||
## 6.16.4 (03/30/2021)
|
||||
### Bug Fixes
|
||||
* Fix build on ppc64 and musl build.
|
||||
|
@ -1361,6 +1361,28 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFUnsorted) {
|
||||
Options options = CurrentOptions();
|
||||
CreateAndReopenWithCF({"one", "two"}, options);
|
||||
|
||||
ASSERT_OK(Put(1, "foo", "bar"));
|
||||
ASSERT_OK(Put(2, "baz", "xyz"));
|
||||
ASSERT_OK(Put(1, "abc", "def"));
|
||||
|
||||
// Note: keys for the same CF do not form a consecutive range
|
||||
std::vector<int> cfs{1, 2, 1};
|
||||
std::vector<std::string> keys{"foo", "baz", "abc"};
|
||||
std::vector<std::string> values;
|
||||
|
||||
values =
|
||||
MultiGet(cfs, keys, /* snapshot */ nullptr, /* batched */ GetParam());
|
||||
|
||||
ASSERT_EQ(values.size(), 3);
|
||||
ASSERT_EQ(values[0], "bar");
|
||||
ASSERT_EQ(values[1], "xyz");
|
||||
ASSERT_EQ(values[2], "def");
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam,
|
||||
testing::Bool());
|
||||
|
||||
|
@ -2164,20 +2164,18 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
|
||||
multiget_cf_data;
|
||||
size_t cf_start = 0;
|
||||
ColumnFamilyHandle* cf = sorted_keys[0]->column_family;
|
||||
|
||||
for (size_t i = 0; i < num_keys; ++i) {
|
||||
KeyContext* key_ctx = sorted_keys[i];
|
||||
if (key_ctx->column_family != cf) {
|
||||
multiget_cf_data.emplace_back(
|
||||
MultiGetColumnFamilyData(cf, cf_start, i - cf_start, nullptr));
|
||||
multiget_cf_data.emplace_back(cf, cf_start, i - cf_start, nullptr);
|
||||
cf_start = i;
|
||||
cf = key_ctx->column_family;
|
||||
}
|
||||
}
|
||||
{
|
||||
// multiget_cf_data.emplace_back(
|
||||
// MultiGetColumnFamilyData(cf, cf_start, num_keys - cf_start, nullptr));
|
||||
multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
|
||||
}
|
||||
|
||||
multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
|
||||
|
||||
std::function<MultiGetColumnFamilyData*(
|
||||
autovector<MultiGetColumnFamilyData,
|
||||
MultiGetContext::MAX_BATCH_SIZE>::iterator&)>
|
||||
@ -2237,7 +2235,7 @@ struct CompareKeyContext {
|
||||
static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
|
||||
uint32_t cfd_id1 = cfh->cfd()->GetID();
|
||||
const Comparator* comparator = cfh->cfd()->user_comparator();
|
||||
cfh = static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
|
||||
cfh = static_cast<ColumnFamilyHandleImpl*>(rhs->column_family);
|
||||
uint32_t cfd_id2 = cfh->cfd()->GetID();
|
||||
|
||||
if (cfd_id1 < cfd_id2) {
|
||||
@ -2261,39 +2259,24 @@ struct CompareKeyContext {
|
||||
void DBImpl::PrepareMultiGetKeys(
|
||||
size_t num_keys, bool sorted_input,
|
||||
autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) {
|
||||
#ifndef NDEBUG
|
||||
if (sorted_input) {
|
||||
for (size_t index = 0; index < sorted_keys->size(); ++index) {
|
||||
if (index > 0) {
|
||||
KeyContext* lhs = (*sorted_keys)[index - 1];
|
||||
KeyContext* rhs = (*sorted_keys)[index];
|
||||
ColumnFamilyHandleImpl* cfh =
|
||||
static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
|
||||
uint32_t cfd_id1 = cfh->cfd()->GetID();
|
||||
const Comparator* comparator = cfh->cfd()->user_comparator();
|
||||
cfh =
|
||||
static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
|
||||
uint32_t cfd_id2 = cfh->cfd()->GetID();
|
||||
#ifndef NDEBUG
|
||||
CompareKeyContext key_context_less;
|
||||
|
||||
assert(cfd_id1 <= cfd_id2);
|
||||
if (cfd_id1 < cfd_id2) {
|
||||
continue;
|
||||
}
|
||||
for (size_t index = 1; index < sorted_keys->size(); ++index) {
|
||||
const KeyContext* const lhs = (*sorted_keys)[index - 1];
|
||||
const KeyContext* const rhs = (*sorted_keys)[index];
|
||||
|
||||
// Both keys are from the same column family
|
||||
int cmp = comparator->CompareWithoutTimestamp(
|
||||
*(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false);
|
||||
assert(cmp <= 0);
|
||||
}
|
||||
index++;
|
||||
// lhs should be <= rhs, or in other words, rhs should NOT be < lhs
|
||||
assert(!key_context_less(rhs, lhs));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!sorted_input) {
|
||||
CompareKeyContext sort_comparator;
|
||||
std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
|
||||
sort_comparator);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
|
||||
CompareKeyContext());
|
||||
}
|
||||
|
||||
void DBImpl::MultiGet(const ReadOptions& read_options,
|
||||
|
Loading…
Reference in New Issue
Block a user