From 9f33284d235b0560c98c340dbb30e76ca7915225 Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 11 Aug 2017 18:01:28 -0700 Subject: [PATCH] fix deletion dropping in intra-L0 Summary: `KeyNotExistsBeyondOutputLevel` didn't consider L0 files' key-ranges. So if a key only was covered by older L0 files' key-ranges, we would incorrectly drop deletions of that key. This PR just skips the deletion-dropping optimization when output level is L0. Closes https://github.com/facebook/rocksdb/pull/2726 Differential Revision: D5617286 Pulled By: ajkr fbshipit-source-id: 4bff1396b06d49a828ba4542f249191052915bce --- db/compaction.cc | 4 +++ db/db_compaction_test.cc | 73 +++++++++++++++++++++++++++++++++++----- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/db/compaction.cc b/db/compaction.cc index b92a9d728..25195d59c 100644 --- a/db/compaction.cc +++ b/db/compaction.cc @@ -287,6 +287,10 @@ bool Compaction::KeyNotExistsBeyondOutputLevel( if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) { return bottommost_level_; } + if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel && + output_level_ == 0) { + return false; + } // Maybe use binary search to find right entry instead of linear search? const Comparator* user_cmp = cfd_->user_comparator(); for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) { diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index c8866b5a0..8905af9ad 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -2603,13 +2603,11 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) { // Files 6-9 are the longest span of available files for which // work-per-deleted-file decreases (see "score" row above). for (int i = 0; i < 10; ++i) { - for (int j = 0; j < 2; ++j) { - ASSERT_OK(Put(Key(0), "")); // prevents trivial move - if (i == 5) { - ASSERT_OK(Put(Key(i + 1), value + value)); - } else { - ASSERT_OK(Put(Key(i + 1), value)); - } + ASSERT_OK(Put(Key(0), "")); // prevents trivial move + if (i == 5) { + ASSERT_OK(Put(Key(i + 1), value + value)); + } else { + ASSERT_OK(Put(Key(i + 1), value)); } ASSERT_OK(Flush()); } @@ -2624,10 +2622,69 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) { ASSERT_EQ(2, level_to_files[0].size()); ASSERT_GT(level_to_files[1].size(), 0); for (int i = 0; i < 2; ++i) { - ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 21); + ASSERT_GE(level_to_files[0][i].fd.file_size, 1 << 21); } } +TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) { + // regression test for issue #2722: L0->L0 compaction can resurrect deleted + // keys from older L0 files if L1+ files' key-ranges do not include the key. + Options options = CurrentOptions(); + options.compression = kNoCompression; + options.level0_file_num_compaction_trigger = 5; + options.max_background_compactions = 2; + options.max_subcompactions = max_subcompactions_; + DestroyAndReopen(options); + + const size_t kValueSize = 1 << 20; + Random rnd(301); + std::string value(RandomString(&rnd, kValueSize)); + + rocksdb::SyncPoint::GetInstance()->LoadDependency( + {{"LevelCompactionPicker::PickCompactionBySize:0", + "CompactionJob::Run():Start"}}); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + // index: 0 1 2 3 4 5 6 7 8 9 + // size: 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB + // score: 1.25 1.33 1.5 2.0 inf + // + // Files 0-4 will be included in an L0->L1 compaction. + // + // L0->L0 will be triggered since the sync points guarantee compaction to base + // level is still blocked when files 5-9 trigger another compaction. All files + // 5-9 are included in the L0->L0 due to work-per-deleted file decreasing. + // + // Put a key-value in files 0-4. Delete that key in files 5-9. Verify the + // L0->L0 preserves the deletion such that the key remains deleted. + for (int i = 0; i < 10; ++i) { + // key 0 serves both to prevent trivial move and as the key we want to + // verify is not resurrected by L0->L0 compaction. + if (i < 5) { + ASSERT_OK(Put(Key(0), "")); + } else { + ASSERT_OK(Delete(Key(0))); + } + ASSERT_OK(Put(Key(i + 1), value)); + ASSERT_OK(Flush()); + } + dbfull()->TEST_WaitForCompact(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + + std::vector> level_to_files; + dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(), + &level_to_files); + ASSERT_GE(level_to_files.size(), 2); // at least L0 and L1 + // L0 has a single output file from L0->L0 + ASSERT_EQ(1, level_to_files[0].size()); + ASSERT_GT(level_to_files[1].size(), 0); + ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 22); + + ReadOptions roptions; + std::string result; + ASSERT_TRUE(db_->Get(roptions, Key(0), &result).IsNotFound()); +} + INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam, ::testing::Values(std::make_tuple(1, true), std::make_tuple(1, false),