From d0c1a01c1bcbf91bcfc900d11d7ad341fb08b6e3 Mon Sep 17 00:00:00 2001 From: Hiep Date: Thu, 3 Sep 2020 14:34:58 -0700 Subject: [PATCH] Avoid converting MERGES to PUTS when allow_ingest_behind is true (#7166) Summary: - Closes https://github.com/facebook/rocksdb/issues/6490 - Currently MERGEs are converted to PUTs at bottom or compaction has reached the beginning of the key, this can wrongly cover a PUT future base case. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7166 Test Plan: - Automated: `make all check` - Manual: With `allow_ingest_behind = true`, add Merge operations to a key then run compaction. Then run ingesting external files to make sure the base case is probably compacted with existing Merges. Reviewed By: cheng-chang Differential Revision: D23325425 Pulled By: ajkr fbshipit-source-id: 3eb415eb7b381b5453e45245393566153b1abb68 --- HISTORY.md | 1 + db/compaction/compaction_iterator.cc | 6 ++- db/compaction/compaction_iterator_test.cc | 58 ++++++++++++++++++++++- 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index c41f9b6bf..27e7cdec3 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -8,6 +8,7 @@ * Fixed a bug in version 6.12 in which BackupEngine::CreateNewBackup could fail intermittently with non-OK status when backing up a read-write DB configured with a DBOptions::file_checksum_gen_factory. * Fix useless no-op compactions scheduled upon snapshot release when options.disable-auto-compactions = true. * Fix a bug when max_write_buffer_size_to_maintain is set, immutable flushed memtable destruction is delayed until the next super version is installed. A memtable is not added to delete list because of its reference hold by super version and super version doesn't switch because of empt delete list. So memory usage keeps on increasing beyond write_buffer_size + max_write_buffer_size_to_maintain. +* Avoid converting MERGES to PUTS when allow_ingest_behind is true. ### New Features * A new option `std::shared_ptr file_checksum_gen_factory` is added to `BackupableDBOptions`. The default value for this option is `nullptr`. If this option is null, the default backup engine checksum function (crc32c) will be used for creating, verifying, or restoring backups. If it is not null and is set to the DB custom checksum factory, the custom checksum function used in DB will also be used for creating, verifying, or restoring backups, in addition to the default checksum function (crc32c). If it is not null and is set to a custom checksum factory different than the DB custom checksum factory (which may be null), BackupEngine will return `Status::InvalidArgument()`. diff --git a/db/compaction/compaction_iterator.cc b/db/compaction/compaction_iterator.cc index 3a99f19a8..cc1cd6e96 100644 --- a/db/compaction/compaction_iterator.cc +++ b/db/compaction/compaction_iterator.cc @@ -86,8 +86,10 @@ CompactionIterator::CompactionIterator( info_log_(info_log) { assert(compaction_filter_ == nullptr || compaction_ != nullptr); assert(snapshots_ != nullptr); - bottommost_level_ = - compaction_ == nullptr ? false : compaction_->bottommost_level(); + bottommost_level_ = compaction_ == nullptr + ? false + : compaction_->bottommost_level() && + !compaction_->allow_ingest_behind(); if (compaction_ != nullptr) { level_ptrs_ = std::vector(compaction_->number_levels(), 0); } diff --git a/db/compaction/compaction_iterator_test.cc b/db/compaction/compaction_iterator_test.cc index 0c50fb9ba..76a207e00 100644 --- a/db/compaction/compaction_iterator_test.cc +++ b/db/compaction/compaction_iterator_test.cc @@ -169,13 +169,15 @@ class FakeCompaction : public CompactionIterator::CompactionProxy { Slice GetLargestUserKey() const override { return "\xff\xff\xff\xff\xff\xff\xff\xff\xff"; } - bool allow_ingest_behind() const override { return false; } + bool allow_ingest_behind() const override { return is_allow_ingest_behind; } bool preserve_deletes() const override { return false; } bool key_not_exists_beyond_output_level = false; bool is_bottommost_level = false; + + bool is_allow_ingest_behind = false; }; // A simplifed snapshot checker which assumes each snapshot has a global @@ -237,6 +239,7 @@ class CompactionIteratorTest : public testing::TestWithParam { if (filter || bottommost_level) { compaction_proxy_ = new FakeCompaction(); compaction_proxy_->is_bottommost_level = bottommost_level; + compaction_proxy_->is_allow_ingest_behind = AllowIngestBehind(); compaction.reset(compaction_proxy_); } bool use_snapshot_checker = UseSnapshotChecker() || GetParam(); @@ -266,6 +269,8 @@ class CompactionIteratorTest : public testing::TestWithParam { virtual bool UseSnapshotChecker() const { return false; } + virtual bool AllowIngestBehind() const { return false; } + void RunTest( const std::vector& input_keys, const std::vector& input_values, @@ -697,6 +702,18 @@ TEST_P(CompactionIteratorTest, RemoveSingleDeletionAtBottomLevel) { nullptr /*compaction_filter*/, true /*bottommost_level*/); } +TEST_P(CompactionIteratorTest, ConvertToPutAtBottom) { + std::shared_ptr merge_op = + MergeOperators::CreateStringAppendOperator(); + RunTest({test::KeyStr("a", 4, kTypeMerge), test::KeyStr("a", 3, kTypeMerge), + test::KeyStr("a", 2, kTypeMerge), test::KeyStr("b", 1, kTypeValue)}, + {"a4", "a3", "a2", "b1"}, + {test::KeyStr("a", 0, kTypeValue), test::KeyStr("b", 0, kTypeValue)}, + {"a2,a3,a4", "b1"}, kMaxSequenceNumber /*last_committed_seq*/, + merge_op.get(), nullptr /*compaction_filter*/, + true /*bottomost_level*/); +} + INSTANTIATE_TEST_CASE_P(CompactionIteratorTestInstance, CompactionIteratorTest, testing::Values(true, false)); @@ -968,6 +985,45 @@ TEST_F(CompactionIteratorWithSnapshotCheckerTest, CompactionFilter_FullMerge) { compaction_filter.get()); } +// Tests how CompactionIterator work together with AllowIngestBehind. +class CompactionIteratorWithAllowIngestBehindTest + : public CompactionIteratorTest { + public: + bool AllowIngestBehind() const override { return true; } +}; + +// When allow_ingest_behind is set, compaction iterator is not targeting +// the bottommost level since there is no guarantee there won't be further +// data ingested under the compaction output in future. +TEST_P(CompactionIteratorWithAllowIngestBehindTest, NoConvertToPutAtBottom) { + std::shared_ptr merge_op = + MergeOperators::CreateStringAppendOperator(); + RunTest({test::KeyStr("a", 4, kTypeMerge), test::KeyStr("a", 3, kTypeMerge), + test::KeyStr("a", 2, kTypeMerge), test::KeyStr("b", 1, kTypeValue)}, + {"a4", "a3", "a2", "b1"}, + {test::KeyStr("a", 4, kTypeMerge), test::KeyStr("b", 1, kTypeValue)}, + {"a2,a3,a4", "b1"}, kMaxSequenceNumber /*last_committed_seq*/, + merge_op.get(), nullptr /*compaction_filter*/, + true /*bottomost_level*/); +} + +TEST_P(CompactionIteratorWithAllowIngestBehindTest, + MergeToPutIfEncounteredPutAtBottom) { + std::shared_ptr merge_op = + MergeOperators::CreateStringAppendOperator(); + RunTest({test::KeyStr("a", 4, kTypeMerge), test::KeyStr("a", 3, kTypeMerge), + test::KeyStr("a", 2, kTypeValue), test::KeyStr("b", 1, kTypeValue)}, + {"a4", "a3", "a2", "b1"}, + {test::KeyStr("a", 4, kTypeValue), test::KeyStr("b", 1, kTypeValue)}, + {"a2,a3,a4", "b1"}, kMaxSequenceNumber /*last_committed_seq*/, + merge_op.get(), nullptr /*compaction_filter*/, + true /*bottomost_level*/); +} + +INSTANTIATE_TEST_CASE_P(CompactionIteratorWithAllowIngestBehindTestInstance, + CompactionIteratorWithAllowIngestBehindTest, + testing::Values(true, false)); + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) {