fix deletion dropping in intra-L0
Summary: `KeyNotExistsBeyondOutputLevel` didn't consider L0 files' key-ranges. So if a key only was covered by older L0 files' key-ranges, we would incorrectly drop deletions of that key. This PR just skips the deletion-dropping optimization when output level is L0. Closes https://github.com/facebook/rocksdb/pull/2726 Differential Revision: D5617286 Pulled By: ajkr fbshipit-source-id: 4bff1396b06d49a828ba4542f249191052915bce
This commit is contained in:
parent
8254e9b57c
commit
acf935e40f
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
* Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`.
|
* Fix wrong latencies in `rocksdb.db.get.micros`, `rocksdb.db.write.micros`, and `rocksdb.sst.read.micros`.
|
||||||
|
* Fix incorrect dropping of deletions during intra-L0 compaction.
|
||||||
|
|
||||||
## 5.7.0 (07/13/2017)
|
## 5.7.0 (07/13/2017)
|
||||||
### Public API Change
|
### Public API Change
|
||||||
|
@ -288,6 +288,10 @@ bool Compaction::KeyNotExistsBeyondOutputLevel(
|
|||||||
if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
|
if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
|
||||||
return bottommost_level_;
|
return bottommost_level_;
|
||||||
}
|
}
|
||||||
|
if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel &&
|
||||||
|
output_level_ == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
// Maybe use binary search to find right entry instead of linear search?
|
// Maybe use binary search to find right entry instead of linear search?
|
||||||
const Comparator* user_cmp = cfd_->user_comparator();
|
const Comparator* user_cmp = cfd_->user_comparator();
|
||||||
for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
|
for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
|
||||||
|
@ -2602,13 +2602,11 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) {
|
|||||||
// Files 6-9 are the longest span of available files for which
|
// Files 6-9 are the longest span of available files for which
|
||||||
// work-per-deleted-file decreases (see "score" row above).
|
// work-per-deleted-file decreases (see "score" row above).
|
||||||
for (int i = 0; i < 10; ++i) {
|
for (int i = 0; i < 10; ++i) {
|
||||||
for (int j = 0; j < 2; ++j) {
|
ASSERT_OK(Put(Key(0), "")); // prevents trivial move
|
||||||
ASSERT_OK(Put(Key(0), "")); // prevents trivial move
|
if (i == 5) {
|
||||||
if (i == 5) {
|
ASSERT_OK(Put(Key(i + 1), value + value));
|
||||||
ASSERT_OK(Put(Key(i + 1), value + value));
|
} else {
|
||||||
} else {
|
ASSERT_OK(Put(Key(i + 1), value));
|
||||||
ASSERT_OK(Put(Key(i + 1), value));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ASSERT_OK(Flush());
|
ASSERT_OK(Flush());
|
||||||
}
|
}
|
||||||
@ -2623,10 +2621,69 @@ TEST_P(DBCompactionTestWithParam, IntraL0Compaction) {
|
|||||||
ASSERT_EQ(2, level_to_files[0].size());
|
ASSERT_EQ(2, level_to_files[0].size());
|
||||||
ASSERT_GT(level_to_files[1].size(), 0);
|
ASSERT_GT(level_to_files[1].size(), 0);
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 21);
|
ASSERT_GE(level_to_files[0][i].fd.file_size, 1 << 21);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_P(DBCompactionTestWithParam, IntraL0CompactionDoesNotObsoleteDeletions) {
|
||||||
|
// regression test for issue #2722: L0->L0 compaction can resurrect deleted
|
||||||
|
// keys from older L0 files if L1+ files' key-ranges do not include the key.
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.compression = kNoCompression;
|
||||||
|
options.level0_file_num_compaction_trigger = 5;
|
||||||
|
options.max_background_compactions = 2;
|
||||||
|
options.max_subcompactions = max_subcompactions_;
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
|
||||||
|
const size_t kValueSize = 1 << 20;
|
||||||
|
Random rnd(301);
|
||||||
|
std::string value(RandomString(&rnd, kValueSize));
|
||||||
|
|
||||||
|
rocksdb::SyncPoint::GetInstance()->LoadDependency(
|
||||||
|
{{"LevelCompactionPicker::PickCompactionBySize:0",
|
||||||
|
"CompactionJob::Run():Start"}});
|
||||||
|
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
|
||||||
|
|
||||||
|
// index: 0 1 2 3 4 5 6 7 8 9
|
||||||
|
// size: 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB 1MB
|
||||||
|
// score: 1.25 1.33 1.5 2.0 inf
|
||||||
|
//
|
||||||
|
// Files 0-4 will be included in an L0->L1 compaction.
|
||||||
|
//
|
||||||
|
// L0->L0 will be triggered since the sync points guarantee compaction to base
|
||||||
|
// level is still blocked when files 5-9 trigger another compaction. All files
|
||||||
|
// 5-9 are included in the L0->L0 due to work-per-deleted file decreasing.
|
||||||
|
//
|
||||||
|
// Put a key-value in files 0-4. Delete that key in files 5-9. Verify the
|
||||||
|
// L0->L0 preserves the deletion such that the key remains deleted.
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
// key 0 serves both to prevent trivial move and as the key we want to
|
||||||
|
// verify is not resurrected by L0->L0 compaction.
|
||||||
|
if (i < 5) {
|
||||||
|
ASSERT_OK(Put(Key(0), ""));
|
||||||
|
} else {
|
||||||
|
ASSERT_OK(Delete(Key(0)));
|
||||||
|
}
|
||||||
|
ASSERT_OK(Put(Key(i + 1), value));
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
}
|
||||||
|
dbfull()->TEST_WaitForCompact();
|
||||||
|
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
|
||||||
|
|
||||||
|
std::vector<std::vector<FileMetaData>> level_to_files;
|
||||||
|
dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
|
||||||
|
&level_to_files);
|
||||||
|
ASSERT_GE(level_to_files.size(), 2); // at least L0 and L1
|
||||||
|
// L0 has a single output file from L0->L0
|
||||||
|
ASSERT_EQ(1, level_to_files[0].size());
|
||||||
|
ASSERT_GT(level_to_files[1].size(), 0);
|
||||||
|
ASSERT_GE(level_to_files[0][0].fd.file_size, 1 << 22);
|
||||||
|
|
||||||
|
ReadOptions roptions;
|
||||||
|
std::string result;
|
||||||
|
ASSERT_TRUE(db_->Get(roptions, Key(0), &result).IsNotFound());
|
||||||
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
|
INSTANTIATE_TEST_CASE_P(DBCompactionTestWithParam, DBCompactionTestWithParam,
|
||||||
::testing::Values(std::make_tuple(1, true),
|
::testing::Values(std::make_tuple(1, true),
|
||||||
std::make_tuple(1, false),
|
std::make_tuple(1, false),
|
||||||
|
Loading…
Reference in New Issue
Block a user