From 37c499628217daddf90b033293af5682f52a5faf Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Fri, 6 Aug 2021 16:26:04 -0700 Subject: [PATCH] Fix the sorting of KeyContexts for batched MultiGet (#8633) Summary: `CompareKeyContext::operator()` on the trunk has a bug: when comparing column family IDs, `lhs` is used for both sides of the comparison. This results in the `KeyContext`s getting sorted solely based on key, which in turn means that keys with the same column family do not necessarily form a single range in the sorted list. This violates an assumption of the batched `MultiGet` logic, leading to the same column family showing up multiple times in the list of `MultiGetColumnFamilyData`. The end result is the code attempting to check out the thread-local `SuperVersion` for the same CF multiple times, causing an assertion violation in debug builds and memory corruption/crash in release builds. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8633 Test Plan: `make check` Reviewed By: riversand963 Differential Revision: D30169182 Pulled By: ltamasi fbshipit-source-id: a47710652df7e95b14b40fb710924c11a8478023 --- HISTORY.md | 1 + db/db_basic_test.cc | 22 ++++++++++++++++++ db/db_impl/db_impl.cc | 53 +++++++++++++++---------------------------- 3 files changed, 41 insertions(+), 35 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index c4cdd6dfd..29a775755 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## 6.23.3 (2021-08-04) ### Bug Fixes * Removed a call to `RenameFile()` on a non-existent info log file ("LOG") when opening a new DB. Such a call was guaranteed to fail though did not impact applications since we swallowed the error. Now we also stopped swallowing errors in renaming "LOG" file. +* Fixed a bug affecting the batched `MultiGet` API when used with keys spanning multiple column families and `sorted_input == false`. ## 6.23.2 (2021-08-04) ### Bug Fixes diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 34f27b809..a78362260 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -1362,6 +1362,28 @@ TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) { } } +TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFUnsorted) { + Options options = CurrentOptions(); + CreateAndReopenWithCF({"one", "two"}, options); + + ASSERT_OK(Put(1, "foo", "bar")); + ASSERT_OK(Put(2, "baz", "xyz")); + ASSERT_OK(Put(1, "abc", "def")); + + // Note: keys for the same CF do not form a consecutive range + std::vector cfs{1, 2, 1}; + std::vector keys{"foo", "baz", "abc"}; + std::vector values; + + values = + MultiGet(cfs, keys, /* snapshot */ nullptr, /* batched */ GetParam()); + + ASSERT_EQ(values.size(), 3); + ASSERT_EQ(values[0], "bar"); + ASSERT_EQ(values[1], "xyz"); + ASSERT_EQ(values[2], "def"); +} + INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam, testing::Bool()); diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 20ebbfed2..482c2a3f1 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -2276,20 +2276,18 @@ void DBImpl::MultiGet(const ReadOptions& read_options, const size_t num_keys, multiget_cf_data; size_t cf_start = 0; ColumnFamilyHandle* cf = sorted_keys[0]->column_family; + for (size_t i = 0; i < num_keys; ++i) { KeyContext* key_ctx = sorted_keys[i]; if (key_ctx->column_family != cf) { - multiget_cf_data.emplace_back( - MultiGetColumnFamilyData(cf, cf_start, i - cf_start, nullptr)); + multiget_cf_data.emplace_back(cf, cf_start, i - cf_start, nullptr); cf_start = i; cf = key_ctx->column_family; } } - { - // multiget_cf_data.emplace_back( - // MultiGetColumnFamilyData(cf, cf_start, num_keys - cf_start, nullptr)); - multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr); - } + + multiget_cf_data.emplace_back(cf, cf_start, num_keys - cf_start, nullptr); + std::function::iterator&)> @@ -2349,7 +2347,7 @@ struct CompareKeyContext { static_cast(lhs->column_family); uint32_t cfd_id1 = cfh->cfd()->GetID(); const Comparator* comparator = cfh->cfd()->user_comparator(); - cfh = static_cast(lhs->column_family); + cfh = static_cast(rhs->column_family); uint32_t cfd_id2 = cfh->cfd()->GetID(); if (cfd_id1 < cfd_id2) { @@ -2373,39 +2371,24 @@ struct CompareKeyContext { void DBImpl::PrepareMultiGetKeys( size_t num_keys, bool sorted_input, autovector* sorted_keys) { -#ifndef NDEBUG if (sorted_input) { - for (size_t index = 0; index < sorted_keys->size(); ++index) { - if (index > 0) { - KeyContext* lhs = (*sorted_keys)[index - 1]; - KeyContext* rhs = (*sorted_keys)[index]; - ColumnFamilyHandleImpl* cfh = - static_cast_with_check(lhs->column_family); - uint32_t cfd_id1 = cfh->cfd()->GetID(); - const Comparator* comparator = cfh->cfd()->user_comparator(); - cfh = - static_cast_with_check(lhs->column_family); - uint32_t cfd_id2 = cfh->cfd()->GetID(); +#ifndef NDEBUG + CompareKeyContext key_context_less; - assert(cfd_id1 <= cfd_id2); - if (cfd_id1 < cfd_id2) { - continue; - } + for (size_t index = 1; index < sorted_keys->size(); ++index) { + const KeyContext* const lhs = (*sorted_keys)[index - 1]; + const KeyContext* const rhs = (*sorted_keys)[index]; - // Both keys are from the same column family - int cmp = comparator->CompareWithoutTimestamp( - *(lhs->key), /*a_has_ts=*/false, *(rhs->key), /*b_has_ts=*/false); - assert(cmp <= 0); - } - index++; + // lhs should be <= rhs, or in other words, rhs should NOT be < lhs + assert(!key_context_less(rhs, lhs)); } - } #endif - if (!sorted_input) { - CompareKeyContext sort_comparator; - std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys, - sort_comparator); + + return; } + + std::sort(sorted_keys->begin(), sorted_keys->begin() + num_keys, + CompareKeyContext()); } void DBImpl::MultiGet(const ReadOptions& read_options,