fix deletion dropping in intra-L0

Summary:
`KeyNotExistsBeyondOutputLevel` didn't consider L0 files' key-ranges. So if a key only was covered by older L0 files' key-ranges, we would incorrectly drop deletions of that key. This PR just skips the deletion-dropping optimization when output level is L0.
Closes https://github.com/facebook/rocksdb/pull/2726

Differential Revision: D5617286

Pulled By: ajkr

fbshipit-source-id: 4bff1396b06d49a828ba4542f249191052915bce
This commit is contained in:
Andrew Kryczka 2017-08-11 18:01:28 -07:00 committed by Facebook Github Bot
parent 8254e9b57c
commit acf935e40f
3 changed files with 70 additions and 8 deletions

View File

@ -7,6 +7,7 @@
### Bug Fixes ### Bug Fixes
* Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`. * Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`.
* Fix incorrect dropping of deletions during intra-L0 compaction.
## 5.7.0 (07/13/2017) ## 5.7.0 (07/13/2017)
### Public API Change ### Public API Change

View File

@ -288,6 +288,10 @@ bool Compaction::KeyNotExistsBeyondOutputLevel(
if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) { if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
return bottommost_level_; return bottommost_level_;
} }
if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel &&
output_level_ == 0) {
return false;
}
// Maybe use binary search to find right entry instead of linear search? // Maybe use binary search to find right entry instead of linear search?
const Comparator* user_cmp = cfd_->user_comparator(); const Comparator* user_cmp = cfd_->user_comparator();
for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) { for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {

View File

@ -2602,13 +2602,11 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) {
// Files 6-9 are the longest span of available files for which // Files 6-9 are the longest span of available files for which
// work-per-deleted-file decreases (see "score" row above). // work-per-deleted-file decreases (see "score" row above).
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
for (int j = 0; j < 2; ++j) { ASSERT_OK(Put(Key(0), "")); // prevents trivial move
ASSERT_OK(Put(Key(0), "")); // prevents trivial move if (i == 5) {
if (i == 5) { ASSERT_OK(Put(Key(i + 1), value + value));
ASSERT_OK(Put(Key(i + 1), value + value)); } else {
} else { ASSERT_OK(Put(Key(i + 1), value));
ASSERT_OK(Put(Key(i + 1), value));
}
} }
ASSERT_OK(Flush()); ASSERT_OK(Flush());
} }
@ -2623,10 +2621,69 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) {
ASSERT_EQ(2, level_to_files[0].size()); ASSERT_EQ(2, level_to_files[0].size());
ASSERT_GT(level_to_files[1].size(), 0); ASSERT_GT(level_to_files[1].size(), 0);
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 21); ASSERT_GE(level_to_files[0][i].fd.file_size, 1 << 21);
} }
} }
TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) {
// regression test for issue #2722: L0->L0 compaction can resurrect deleted
// keys from older L0 files if L1+ files' key-ranges do not include the key.
Options options = CurrentOptions();
options.compression = kNoCompression;
options.level0_file_num_compaction_trigger = 5;
options.max_background_compactions = 2;
options.max_subcompactions = max_subcompactions_;
DestroyAndReopen(options);
const size_t kValueSize = 1 << 20;
Random rnd(301);
std::string value(RandomString(&rnd, kValueSize));
rocksdb::SyncPoint::GetInstance()->LoadDependency(
{{"LevelCompactionPicker::PickCompactionBySize:0",
"CompactionJob::Run():Start"}});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
// index: 0 1 2 3 4 5 6 7 8 9
// size: 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB
// score: 1.25 1.33 1.5 2.0 inf
//
// Files 0-4 will be included in an L0->L1 compaction.
//
// L0->L0 will be triggered since the sync points guarantee compaction to base
// level is still blocked when files 5-9 trigger another compaction. All files
// 5-9 are included in the L0->L0 due to work-per-deleted file decreasing.
//
// Put a key-value in files 0-4. Delete that key in files 5-9. Verify the
// L0->L0 preserves the deletion such that the key remains deleted.
for (int i = 0; i < 10; ++i) {
// key 0 serves both to prevent trivial move and as the key we want to
// verify is not resurrected by L0->L0 compaction.
if (i < 5) {
ASSERT_OK(Put(Key(0), ""));
} else {
ASSERT_OK(Delete(Key(0)));
}
ASSERT_OK(Put(Key(i + 1), value));
ASSERT_OK(Flush());
}
dbfull()->TEST_WaitForCompact();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
std::vector<std::vector<FileMetaData>> level_to_files;
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
&level_to_files);
ASSERT_GE(level_to_files.size(), 2); // at least L0 and L1
// L0 has a single output file from L0->L0
ASSERT_EQ(1, level_to_files[0].size());
ASSERT_GT(level_to_files[1].size(), 0);
ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 22);
ReadOptions roptions;
std::string result;
ASSERT_TRUE(db_->Get(roptions, Key(0), &result).IsNotFound());
}
INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam, INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
::testing::Values(std::make_tuple(1, true), ::testing::Values(std::make_tuple(1, true),
std::make_tuple(1, false), std::make_tuple(1, false),