Preserve overlapping file endpoint invariant
Summary: Fix for #2833. - In `DeleteFilesInRange`, use `GetCleanInputsWithinInterval` instead of `GetOverlappingInputs` to make sure we get a clean cut set of files to delete. - In `GetCleanInputsWithinInterval`, support nullptr as `begin_key` or `end_key`. - In `GetOverlappingInputsRangeBinarySearch`, move the assertion for non-empty range away from `ExtendFileRangeWithinInterval`, which should be allowed to return an empty range (via `end_index < begin_index`). Closes https://github.com/facebook/rocksdb/pull/2843 Differential Revision: D5772387 Pulled By: ajkr fbshipit-source-id: e554e8461823c6be82b21a9262a2da02b3957881
This commit is contained in:
parent
497cd90d6a
commit
8486eab5ad
@ -1517,6 +1517,60 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
|
||||
ASSERT_GT(old_num_files, new_num_files);
|
||||
}
|
||||
|
||||
TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
|
||||
// regression test for #2833: groups of files whose user-keys overlap at the
|
||||
// endpoints could be split by `DeleteFilesInRange`. This caused old data to
|
||||
// reappear, either because a new version of the key was removed, or a range
|
||||
// deletion was partially dropped. It could also cause non-overlapping
|
||||
// invariant to be violated if the files dropped by DeleteFilesInRange were
|
||||
// a subset of files that a range deletion spans.
|
||||
const int kNumL0Files = 2;
|
||||
const int kValSize = 8 << 10; // 8KB
|
||||
Options options = CurrentOptions();
|
||||
options.level0_file_num_compaction_trigger = kNumL0Files;
|
||||
options.target_file_size_base = 1 << 10; // 1KB
|
||||
DestroyAndReopen(options);
|
||||
|
||||
// The snapshot prevents key 1 from having its old version dropped. The low
|
||||
// `target_file_size_base` ensures two keys will be in each output file.
|
||||
const Snapshot* snapshot = nullptr;
|
||||
Random rnd(301);
|
||||
// The value indicates which flush the key belonged to, which is enough
|
||||
// for us to determine the keys' relative ages. After L0 flushes finish,
|
||||
// files look like:
|
||||
//
|
||||
// File 0: 0 -> vals[0], 1 -> vals[0]
|
||||
// File 1: 1 -> vals[1], 2 -> vals[1]
|
||||
//
|
||||
// Then L0->L1 compaction happens, which outputs keys as follows:
|
||||
//
|
||||
// File 0: 0 -> vals[0], 1 -> vals[1]
|
||||
// File 1: 1 -> vals[0], 2 -> vals[1]
|
||||
//
|
||||
// DeleteFilesInRange shouldn't be allowed to drop just file 0, as that
|
||||
// would cause `1 -> vals[0]` (an older key) to reappear.
|
||||
std::string vals[kNumL0Files];
|
||||
for (int i = 0; i < kNumL0Files; ++i) {
|
||||
vals[i] = RandomString(&rnd, kValSize);
|
||||
Put(Key(i), vals[i]);
|
||||
Put(Key(i + 1), vals[i]);
|
||||
Flush();
|
||||
if (i == 0) {
|
||||
snapshot = db_->GetSnapshot();
|
||||
}
|
||||
}
|
||||
dbfull()->TEST_WaitForCompact();
|
||||
|
||||
// Verify `DeleteFilesInRange` can't drop only file 0 which would cause
|
||||
// "1 -> vals[0]" to reappear.
|
||||
Slice begin = Key(0);
|
||||
Slice end = Key(1);
|
||||
ASSERT_OK(DeleteFilesInRange(db_, db_->DefaultColumnFamily(), &begin, &end));
|
||||
ASSERT_EQ(vals[1], Get(Key(1)));
|
||||
|
||||
db_->ReleaseSnapshot(snapshot);
|
||||
}
|
||||
|
||||
TEST_P(DBCompactionTestWithParam, TrivialMoveToLastLevelWithFiles) {
|
||||
int32_t trivial_move = 0;
|
||||
int32_t non_trivial_move = 0;
|
||||
|
@ -2158,25 +2158,19 @@ Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family,
|
||||
end_key = &end_storage;
|
||||
}
|
||||
|
||||
vstorage->GetOverlappingInputs(i, begin_key, end_key, &level_files, -1,
|
||||
nullptr, false);
|
||||
vstorage->GetCleanInputsWithinInterval(i, begin_key, end_key,
|
||||
&level_files, -1 /* hint_index */,
|
||||
nullptr /* file_index */);
|
||||
FileMetaData* level_file;
|
||||
for (uint32_t j = 0; j < level_files.size(); j++) {
|
||||
level_file = level_files[j];
|
||||
if (((begin == nullptr) ||
|
||||
(cfd->internal_comparator().user_comparator()->Compare(
|
||||
level_file->smallest.user_key(), *begin) >= 0)) &&
|
||||
((end == nullptr) ||
|
||||
(cfd->internal_comparator().user_comparator()->Compare(
|
||||
level_file->largest.user_key(), *end) <= 0))) {
|
||||
if (level_file->being_compacted) {
|
||||
continue;
|
||||
}
|
||||
edit.SetColumnFamily(cfd->GetID());
|
||||
edit.DeleteFile(i, level_file->fd.GetNumber());
|
||||
deleted_files.push_back(level_file);
|
||||
level_file->being_compacted = true;
|
||||
if (level_file->being_compacted) {
|
||||
continue;
|
||||
}
|
||||
edit.SetColumnFamily(cfd->GetID());
|
||||
edit.DeleteFile(i, level_file->fd.GetNumber());
|
||||
deleted_files.push_back(level_file);
|
||||
level_file->being_compacted = true;
|
||||
}
|
||||
}
|
||||
if (edit.GetDeletedFiles().empty()) {
|
||||
|
@ -1851,27 +1851,33 @@ void VersionStorageInfo::GetOverlappingInputs(
|
||||
void VersionStorageInfo::GetCleanInputsWithinInterval(
|
||||
int level, const InternalKey* begin, const InternalKey* end,
|
||||
std::vector<FileMetaData*>* inputs, int hint_index, int* file_index) const {
|
||||
if (level >= num_non_empty_levels_) {
|
||||
// this level is empty, no inputs within range
|
||||
return;
|
||||
}
|
||||
|
||||
inputs->clear();
|
||||
Slice user_begin, user_end;
|
||||
if (begin != nullptr) {
|
||||
user_begin = begin->user_key();
|
||||
}
|
||||
if (end != nullptr) {
|
||||
user_end = end->user_key();
|
||||
}
|
||||
if (file_index) {
|
||||
*file_index = -1;
|
||||
}
|
||||
if (begin != nullptr && end != nullptr && level > 0) {
|
||||
GetOverlappingInputsRangeBinarySearch(level, user_begin, user_end, inputs,
|
||||
hint_index, file_index,
|
||||
true /* within_interval */);
|
||||
if (level >= num_non_empty_levels_ || level == 0 ||
|
||||
level_files_brief_[level].num_files == 0) {
|
||||
// this level is empty, no inputs within range
|
||||
// also don't support clean input interval within L0
|
||||
return;
|
||||
}
|
||||
|
||||
Slice user_begin, user_end;
|
||||
const auto& level_files = level_files_brief_[level];
|
||||
if (begin == nullptr) {
|
||||
user_begin = ExtractUserKey(level_files.files[0].smallest_key);
|
||||
} else {
|
||||
user_begin = begin->user_key();
|
||||
}
|
||||
if (end == nullptr) {
|
||||
user_end = ExtractUserKey(
|
||||
level_files.files[level_files.num_files - 1].largest_key);
|
||||
} else {
|
||||
user_end = end->user_key();
|
||||
}
|
||||
GetOverlappingInputsRangeBinarySearch(level, user_begin, user_end, inputs,
|
||||
hint_index, file_index,
|
||||
true /* within_interval */);
|
||||
}
|
||||
|
||||
// Store in "*inputs" all files in "level" that overlap [begin,end]
|
||||
@ -1934,8 +1940,8 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch(
|
||||
} else {
|
||||
ExtendFileRangeOverlappingInterval(level, user_begin, user_end, mid,
|
||||
&start_index, &end_index);
|
||||
assert(end_index >= start_index);
|
||||
}
|
||||
assert(end_index >= start_index);
|
||||
// insert overlapping files into vector
|
||||
for (int i = start_index; i <= end_index; i++) {
|
||||
inputs->push_back(files_[level][i]);
|
||||
|
@ -325,7 +325,8 @@ void CancelAllBackgroundWork(DB* db, bool wait = false);
|
||||
|
||||
// Delete files which are entirely in the given range
|
||||
// Could leave some keys in the range which are in files which are not
|
||||
// entirely in the range.
|
||||
// entirely in the range. Also leaves L0 files regardless of whether they're
|
||||
// in the range.
|
||||
// Snapshots before the delete might not see the data in the given range.
|
||||
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
|
||||
const Slice* begin, const Slice* end);
|
||||
|
Loading…
Reference in New Issue
Block a user