From ebe89ef9d84cf1a05a47b8d03c7509f9f103ad10 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Tue, 4 Jun 2019 10:17:24 -0700 Subject: [PATCH] Fix merging range tombstone covering put during flush/compaction (#5406) Summary: Flush/compaction use `MergeUntil` which has a special code path to handle a merge ending with a non-`Merge` point key. In particular if that key is a `Put` we forgot to check whether it is covered by a range tombstone. If it is covered then we must not include it in the following call to `TimedFullMerge`. Fixes #5392. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5406 Differential Revision: D15611144 Pulled By: sagar0 fbshipit-source-id: ba6a7863ca2d043f591de78fd0c4f4561f0c500e --- HISTORY.md | 1 + db/db_range_del_test.cc | 24 ++++++++++++++++++++++++ db/merge_helper.cc | 10 +++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index b9b6998c6..b3c2ef14a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -24,6 +24,7 @@ ### Bug Fixes * Fix a bug in WAL replay of secondary instance by skipping write batches with older sequence numbers than the current last sequence number. +* Fix flush's/compaction's merge processing logic which allowed `Put`s covered by range tombstones to reappear. Note `Put`s may exist even if the user only ever called `Merge()` due to an internal conversion during compaction to the bottommost level. ## 6.2.0 (4/30/2019) ### New Features diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index 16d682fc0..e58095b2d 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -491,6 +491,30 @@ TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) { ASSERT_EQ(expected, actual); } +TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) { + // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4) + // Flush. The `CompactionIterator` previously had a bug where we forgot to + // check for covering range tombstones when processing the (1) Put, causing + // it to reappear after the flush. + Options opts = CurrentOptions(); + opts.merge_operator = MergeOperators::CreateUInt64AddOperator(); + Reopen(opts); + + std::string val; + PutFixed64(&val, 1); + ASSERT_OK(db_->Put(WriteOptions(), "key", val)); + ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), + "key", "key_")); + ASSERT_OK(db_->Merge(WriteOptions(), "key", val)); + ASSERT_OK(db_->Flush(FlushOptions())); + + ReadOptions read_opts; + std::string expected, actual; + ASSERT_OK(db_->Get(read_opts, "key", &actual)); + PutFixed64(&expected, 1); + ASSERT_EQ(expected, actual); +} + // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) { diff --git a/db/merge_helper.cc b/db/merge_helper.cc index 4a4d2fb71..b5ae924ff 100644 --- a/db/merge_helper.cc +++ b/db/merge_helper.cc @@ -201,7 +201,15 @@ Status MergeHelper::MergeUntil(InternalIterator* iter, // want. Also if we're in compaction and it's a put, it would be nice to // run compaction filter on it. const Slice val = iter->value(); - const Slice* val_ptr = (kTypeValue == ikey.type) ? &val : nullptr; + const Slice* val_ptr; + if (kTypeValue == ikey.type && + (range_del_agg == nullptr || + !range_del_agg->ShouldDelete( + ikey, RangeDelPositioningMode::kForwardTraversal))) { + val_ptr = &val; + } else { + val_ptr = nullptr; + } std::string merge_result; s = TimedFullMerge(user_merge_operator_, ikey.user_key, val_ptr, merge_context_.GetOperands(), &merge_result, logger_,