Fix the sorting of KeyContexts for batched MultiGet (#8633)
Summary: `CompareKeyContext::operator()` on the trunk has a bug: when comparing column family IDs, `lhs` is used for both sides of the comparison. This results in the `KeyContext`s getting sorted solely based on key, which in turn means that keys with the same column family do not necessarily form a single range in the sorted list. This violates an assumption of the batched `MultiGet` logic, leading to the same column family showing up multiple times in the list of `MultiGetColumnFamilyData`. The end result is the code attempting to check out the thread-local `SuperVersion` for the same CF multiple times, causing an assertion violation in debug builds and memory corruption/crash in release builds. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8633 Test Plan: `make check` Reviewed By: riversand963 Differential Revision: D30169182 Pulled By: ltamasi fbshipit-source-id: a47710652df7e95b14b40fb710924c11a8478023
This commit is contained in:
parent
53da604580
commit
37c4996282
@ -2,6 +2,7 @@
|
|||||||
## 6.23.3 (2021-08-04)
|
## 6.23.3 (2021-08-04)
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
* Removed a call to `RenameFile()` on a non-existent info log file ("LOG") when opening a new DB. Such a call was guaranteed to fail though did not impact applications since we swallowed the error. Now we also stopped swallowing errors in renaming "LOG" file.
|
* Removed a call to `RenameFile()` on a non-existent info log file ("LOG") when opening a new DB. Such a call was guaranteed to fail though did not impact applications since we swallowed the error. Now we also stopped swallowing errors in renaming "LOG" file.
|
||||||
|
* Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`.
|
||||||
|
|
||||||
## 6.23.2 (2021-08-04)
|
## 6.23.2 (2021-08-04)
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
|
@ -1362,6 +1362,28 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFUnsorted) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
CreateAndReopenWithCF({"one", "two"}, options);
|
||||||
|
|
||||||
|
ASSERT_OK(Put(1, "foo", "bar"));
|
||||||
|
ASSERT_OK(Put(2, "baz", "xyz"));
|
||||||
|
ASSERT_OK(Put(1, "abc", "def"));
|
||||||
|
|
||||||
|
// Note: keys for the same CF do not form a consecutive range
|
||||||
|
std::vector<int> cfs{1, 2, 1};
|
||||||
|
std::vector<std::string> keys{"foo", "baz", "abc"};
|
||||||
|
std::vector<std::string> values;
|
||||||
|
|
||||||
|
values =
|
||||||
|
MultiGet(cfs, keys, /* snapshot */ nullptr, /* batched */ GetParam());
|
||||||
|
|
||||||
|
ASSERT_EQ(values.size(), 3);
|
||||||
|
ASSERT_EQ(values[0], "bar");
|
||||||
|
ASSERT_EQ(values[1], "xyz");
|
||||||
|
ASSERT_EQ(values[2], "def");
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam,
|
INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam,
|
||||||
testing::Bool());
|
testing::Bool());
|
||||||
|
|
||||||
|
@ -2276,20 +2276,18 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys,
|
|||||||
multiget_cf_data;
|
multiget_cf_data;
|
||||||
size_t cf_start = 0;
|
size_t cf_start = 0;
|
||||||
ColumnFamilyHandle* cf = sorted_keys[0]->column_family;
|
ColumnFamilyHandle* cf = sorted_keys[0]->column_family;
|
||||||
|
|
||||||
for (size_t i = 0; i < num_keys; ++i) {
|
for (size_t i = 0; i < num_keys; ++i) {
|
||||||
KeyContext* key_ctx = sorted_keys[i];
|
KeyContext* key_ctx = sorted_keys[i];
|
||||||
if (key_ctx->column_family != cf) {
|
if (key_ctx->column_family != cf) {
|
||||||
multiget_cf_data.emplace_back(
|
multiget_cf_data.emplace_back(cf, cf_start, i - cf_start, nullptr);
|
||||||
MultiGetColumnFamilyData(cf, cf_start, i - cf_start, nullptr));
|
|
||||||
cf_start = i;
|
cf_start = i;
|
||||||
cf = key_ctx->column_family;
|
cf = key_ctx->column_family;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
|
||||||
// multiget_cf_data.emplace_back(
|
|
||||||
// MultiGetColumnFamilyData(cf, cf_start, num_keys - cf_start, nullptr));
|
|
||||||
multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
|
multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr);
|
||||||
}
|
|
||||||
std::function<MultiGetColumnFamilyData*(
|
std::function<MultiGetColumnFamilyData*(
|
||||||
autovector<MultiGetColumnFamilyData,
|
autovector<MultiGetColumnFamilyData,
|
||||||
MultiGetContext::MAX_BATCH_SIZE>::iterator&)>
|
MultiGetContext::MAX_BATCH_SIZE>::iterator&)>
|
||||||
@ -2349,7 +2347,7 @@ struct CompareKeyContext {
|
|||||||
static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
|
static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
|
||||||
uint32_t cfd_id1 = cfh->cfd()->GetID();
|
uint32_t cfd_id1 = cfh->cfd()->GetID();
|
||||||
const Comparator* comparator = cfh->cfd()->user_comparator();
|
const Comparator* comparator = cfh->cfd()->user_comparator();
|
||||||
cfh = static_cast<ColumnFamilyHandleImpl*>(lhs->column_family);
|
cfh = static_cast<ColumnFamilyHandleImpl*>(rhs->column_family);
|
||||||
uint32_t cfd_id2 = cfh->cfd()->GetID();
|
uint32_t cfd_id2 = cfh->cfd()->GetID();
|
||||||
|
|
||||||
if (cfd_id1 < cfd_id2) {
|
if (cfd_id1 < cfd_id2) {
|
||||||
@ -2373,39 +2371,24 @@ struct CompareKeyContext {
|
|||||||
void DBImpl::PrepareMultiGetKeys(
|
void DBImpl::PrepareMultiGetKeys(
|
||||||
size_t num_keys, bool sorted_input,
|
size_t num_keys, bool sorted_input,
|
||||||
autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) {
|
autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys) {
|
||||||
#ifndef NDEBUG
|
|
||||||
if (sorted_input) {
|
if (sorted_input) {
|
||||||
for (size_t index = 0; index < sorted_keys->size(); ++index) {
|
#ifndef NDEBUG
|
||||||
if (index > 0) {
|
CompareKeyContext key_context_less;
|
||||||
KeyContext* lhs = (*sorted_keys)[index - 1];
|
|
||||||
KeyContext* rhs = (*sorted_keys)[index];
|
|
||||||
ColumnFamilyHandleImpl* cfh =
|
|
||||||
static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
|
|
||||||
uint32_t cfd_id1 = cfh->cfd()->GetID();
|
|
||||||
const Comparator* comparator = cfh->cfd()->user_comparator();
|
|
||||||
cfh =
|
|
||||||
static_cast_with_check<ColumnFamilyHandleImpl>(lhs->column_family);
|
|
||||||
uint32_t cfd_id2 = cfh->cfd()->GetID();
|
|
||||||
|
|
||||||
assert(cfd_id1 <= cfd_id2);
|
for (size_t index = 1; index < sorted_keys->size(); ++index) {
|
||||||
if (cfd_id1 < cfd_id2) {
|
const KeyContext* const lhs = (*sorted_keys)[index - 1];
|
||||||
continue;
|
const KeyContext* const rhs = (*sorted_keys)[index];
|
||||||
}
|
|
||||||
|
|
||||||
// Both keys are from the same column family
|
// lhs should be <= rhs, or in other words, rhs should NOT be < lhs
|
||||||
int cmp = comparator->CompareWithoutTimestamp(
|
assert(!key_context_less(rhs, lhs));
|
||||||
*(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false);
|
|
||||||
assert(cmp <= 0);
|
|
||||||
}
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (!sorted_input) {
|
|
||||||
CompareKeyContext sort_comparator;
|
return;
|
||||||
std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
|
|
||||||
sort_comparator);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys,
|
||||||
|
CompareKeyContext());
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBImpl::MultiGet(const ReadOptions& read_options,
|
void DBImpl::MultiGet(const ReadOptions& read_options,
|
||||||
|
Loading…
Reference in New Issue
Block a user