From 2ba7f1e57431752bc5a89f22c3452d0d20d74112 Mon Sep 17 00:00:00 2001 From: anand1976 <33647610+anand1976@users.noreply.github.com> Date: Mon, 16 Dec 2019 20:35:11 -0800 Subject: [PATCH] Fix crash in Transaction::MultiGet() when num_keys > 32 Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/6192 Test Plan: Add a unit test that fails without the fix and passes now make check Differential Revision: D19124781 Pulled By: anand1976 fbshipit-source-id: 8c8cb6fa16c3fc23ec011e168561a13f76bbd783 --- HISTORY.md | 4 + utilities/transactions/transaction_test.cc | 98 +++++++++++++++++++ .../write_batch_with_index.cc | 5 +- 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 3f1e7c9ae..6f77477a8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,8 @@ # Rocksdb Change Log +## 6.6.1 +### Bug Fixes +* Fix a bug in WriteBatchWithIndex::MultiGetFromBatchAndDB, which is called by Transaction::MultiGet, that causes due to stale pointer access when the number of keys is > 32 + ## 6.6.0 (11/25/2019) ### Bug Fixes * Fix data corruption casued by output of intra-L0 compaction on ingested file not being placed in correct order in L0. diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index fd8490119..da1edd185 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -2823,6 +2823,104 @@ TEST_P(TransactionTest, MultiGetBatchedTest) { } } +// This test calls WriteBatchWithIndex::MultiGetFromBatchAndDB with a large +// number of keys, i.e greater than MultiGetContext::MAX_BATCH_SIZE, which is +// is 32. This forces autovector allocations in the MultiGet code paths +// to use std::vector in addition to stack allocations. The MultiGet keys +// includes Merges, which are handled specially in MultiGetFromBatchAndDB by +// allocating an autovector of MergeContexts +TEST_P(TransactionTest, MultiGetLargeBatchedTest) { + WriteOptions write_options; + ReadOptions read_options, snapshot_read_options; + string value; + Status s; + + ColumnFamilyHandle* cf; + ColumnFamilyOptions cf_options; + + std::vector key_str; + for (int i = 0; i < 100; ++i) { + key_str.emplace_back(std::to_string(i)); + } + // Create a new column families + s = db->CreateColumnFamily(cf_options, "CF", &cf); + ASSERT_OK(s); + + delete cf; + delete db; + db = nullptr; + + // open DB with three column families + std::vector column_families; + // have to open default column family + column_families.push_back( + ColumnFamilyDescriptor(kDefaultColumnFamilyName, ColumnFamilyOptions())); + // open the new column families + cf_options.merge_operator = MergeOperators::CreateStringAppendOperator(); + column_families.push_back(ColumnFamilyDescriptor("CF", cf_options)); + + std::vector handles; + + options.merge_operator = MergeOperators::CreateStringAppendOperator(); + ASSERT_OK(ReOpenNoDelete(column_families, &handles)); + assert(db != nullptr); + + // Write some data to the db + WriteBatch batch; + for (int i = 0; i < 3 * MultiGetContext::MAX_BATCH_SIZE; ++i) { + std::string val = "val" + std::to_string(i); + batch.Put(handles[1], key_str[i], val); + } + s = db->Write(write_options, &batch); + ASSERT_OK(s); + + WriteBatchWithIndex wb; + // Write some data to the db + s = wb.Delete(handles[1], std::to_string(1)); + ASSERT_OK(s); + s = wb.Put(handles[1], std::to_string(2), "new_val" + std::to_string(2)); + ASSERT_OK(s); + // Write a lot of merges so when we call MultiGetFromBatchAndDB later on, + // it is forced to use std::vector in rocksdb::autovector to allocate + // MergeContexts. The number of merges needs to be > + // MultiGetContext::MAX_BATCH_SIZE + for (int i = 8; i < MultiGetContext::MAX_BATCH_SIZE + 24; ++i) { + s = wb.Merge(handles[1], std::to_string(i), "merge"); + ASSERT_OK(s); + } + + // MultiGet a lot of keys in order to force std::vector reallocations + std::vector keys; + for (int i = 0; i < MultiGetContext::MAX_BATCH_SIZE + 32; ++i) { + keys.emplace_back(key_str[i]); + } + std::vector values(keys.size()); + std::vector statuses(keys.size()); + + wb.MultiGetFromBatchAndDB(db, snapshot_read_options, handles[1], keys.size(), keys.data(), + values.data(), statuses.data(), false); + for (size_t i =0; i < keys.size(); ++i) { + if (i == 1) { + ASSERT_TRUE(statuses[1].IsNotFound()); + } else if (i == 2) { + ASSERT_TRUE(statuses[2].ok()); + ASSERT_EQ(values[2], "new_val" + std::to_string(2)); + } else if (i >= 8 && i < 56) { + ASSERT_TRUE(statuses[i].ok()); + ASSERT_EQ(values[i], "val" + std::to_string(i) + ",merge"); + } else { + ASSERT_TRUE(statuses[i].ok()); + if (values[i] != "val" + std::to_string(i)) { + ASSERT_EQ(values[i], "val" + std::to_string(i)); + } + } + } + + for (auto handle : handles) { + delete handle; + } +} + TEST_P(TransactionTest, ColumnFamiliesTest2) { WriteOptions write_options; ReadOptions read_options, snapshot_read_options; diff --git a/utilities/write_batch_with_index/write_batch_with_index.cc b/utilities/write_batch_with_index/write_batch_with_index.cc index fe99e43e6..54fb2292c 100644 --- a/utilities/write_batch_with_index/write_batch_with_index.cc +++ b/utilities/write_batch_with_index/write_batch_with_index.cc @@ -1004,10 +1004,13 @@ void WriteBatchWithIndex::MultiGetFromBatchAndDB( assert(result == WriteBatchWithIndexInternal::Result::kMergeInProgress || result == WriteBatchWithIndexInternal::Result::kNotFound); key_context.emplace_back(column_family, keys[i], &values[i], &statuses[i]); - sorted_keys.emplace_back(&key_context.back()); merges.emplace_back(result, std::move(merge_context)); } + for (KeyContext& key : key_context) { + sorted_keys.emplace_back(&key); + } + // Did not find key in batch OR could not resolve Merges. Try DB. static_cast_with_check(db->GetRootDB()) ->PrepareMultiGetKeys(key_context.size(), sorted_input, &sorted_keys);