Fix IngestExternalFile overlapping check (#5649)
Summary: Previously, the end key of a range deletion tombstone was considered exclusive for the purposes of deletion, but considered inclusive when checking if two SSTables overlap. For example, an SSTable with a range deletion tombstone [a, b) would be considered overlapping with an SSTable with a range deletion tombstone [b, c). This commit fixes this check. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5649 Differential Revision: D16808765 Pulled By: anand1976 fbshipit-source-id: 5c7ad1c027e4f778d35070e5dae1b8e6037e0d68
This commit is contained in:
parent
d92a59b6f2
commit
d61d4507c0
@ -835,6 +835,48 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) {
|
||||
ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1));
|
||||
}
|
||||
|
||||
TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) {
|
||||
Options options = CurrentOptions();
|
||||
SstFileWriter sst_file_writer(EnvOptions(), options);
|
||||
|
||||
// file8.sst (delete 300 => 400)
|
||||
std::string file8 = sst_files_dir_ + "file8.sst";
|
||||
ASSERT_OK(sst_file_writer.Open(file8));
|
||||
ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(400)));
|
||||
ExternalSstFileInfo file8_info;
|
||||
Status s = sst_file_writer.Finish(&file8_info);
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
ASSERT_EQ(file8_info.file_path, file8);
|
||||
ASSERT_EQ(file8_info.num_entries, 0);
|
||||
ASSERT_EQ(file8_info.smallest_key, "");
|
||||
ASSERT_EQ(file8_info.largest_key, "");
|
||||
ASSERT_EQ(file8_info.num_range_del_entries, 1);
|
||||
ASSERT_EQ(file8_info.smallest_range_del_key, Key(300));
|
||||
ASSERT_EQ(file8_info.largest_range_del_key, Key(400));
|
||||
|
||||
// file9.sst (delete 400 => 500)
|
||||
std::string file9 = sst_files_dir_ + "file9.sst";
|
||||
ASSERT_OK(sst_file_writer.Open(file9));
|
||||
ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500)));
|
||||
ExternalSstFileInfo file9_info;
|
||||
s = sst_file_writer.Finish(&file9_info);
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
ASSERT_EQ(file9_info.file_path, file9);
|
||||
ASSERT_EQ(file9_info.num_entries, 0);
|
||||
ASSERT_EQ(file9_info.smallest_key, "");
|
||||
ASSERT_EQ(file9_info.largest_key, "");
|
||||
ASSERT_EQ(file9_info.num_range_del_entries, 1);
|
||||
ASSERT_EQ(file9_info.smallest_range_del_key, Key(400));
|
||||
ASSERT_EQ(file9_info.largest_range_del_key, Key(500));
|
||||
|
||||
// Range deletion tombstones are exclusive on their end key, so these SSTs
|
||||
// should not be considered as overlapping.
|
||||
s = DeprecatedAddFile({file8, file9});
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
|
||||
DestroyAndRecreateExternalSSTFilesDir();
|
||||
}
|
||||
|
||||
TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) {
|
||||
bool change_checksum_called = false;
|
||||
const auto& change_checksum = [&](void* arg) {
|
||||
|
@ -64,13 +64,13 @@ Status ExternalSstFileIngestionJob::Prepare(
|
||||
std::sort(
|
||||
sorted_files.begin(), sorted_files.end(),
|
||||
[&ucmp](const IngestedFileInfo* info1, const IngestedFileInfo* info2) {
|
||||
return ucmp->Compare(info1->smallest_user_key,
|
||||
info2->smallest_user_key) < 0;
|
||||
return sstableKeyCompare(ucmp, info1->smallest_internal_key,
|
||||
info2->smallest_internal_key) < 0;
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < num_files - 1; i++) {
|
||||
if (ucmp->Compare(sorted_files[i]->largest_user_key,
|
||||
sorted_files[i + 1]->smallest_user_key) >= 0) {
|
||||
if (sstableKeyCompare(ucmp, sorted_files[i]->largest_internal_key,
|
||||
sorted_files[i + 1]->smallest_internal_key) >= 0) {
|
||||
return Status::NotSupported("Files have overlapping ranges");
|
||||
}
|
||||
}
|
||||
@ -81,8 +81,8 @@ Status ExternalSstFileIngestionJob::Prepare(
|
||||
return Status::InvalidArgument("File contain no entries");
|
||||
}
|
||||
|
||||
if (!f.smallest_internal_key().Valid() ||
|
||||
!f.largest_internal_key().Valid()) {
|
||||
if (!f.smallest_internal_key.Valid() ||
|
||||
!f.largest_internal_key.Valid()) {
|
||||
return Status::Corruption("Generated table have corrupted keys");
|
||||
}
|
||||
}
|
||||
@ -178,8 +178,8 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
|
||||
SuperVersion* super_version) {
|
||||
autovector<Range> ranges;
|
||||
for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) {
|
||||
ranges.emplace_back(file_to_ingest.smallest_user_key,
|
||||
file_to_ingest.largest_user_key);
|
||||
ranges.emplace_back(file_to_ingest.smallest_internal_key.user_key(),
|
||||
file_to_ingest.largest_internal_key.user_key());
|
||||
}
|
||||
Status status =
|
||||
cfd_->RangesOverlapWithMemtables(ranges, super_version, flush_needed);
|
||||
@ -238,8 +238,8 @@ Status ExternalSstFileIngestionJob::Run() {
|
||||
return status;
|
||||
}
|
||||
edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(),
|
||||
f.fd.GetFileSize(), f.smallest_internal_key(),
|
||||
f.largest_internal_key(), f.assigned_seqno, f.assigned_seqno,
|
||||
f.fd.GetFileSize(), f.smallest_internal_key,
|
||||
f.largest_internal_key, f.assigned_seqno, f.assigned_seqno,
|
||||
false);
|
||||
}
|
||||
return status;
|
||||
@ -414,6 +414,8 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
||||
table_reader->NewRangeTombstoneIterator(ro));
|
||||
|
||||
// Get first (smallest) and last (largest) key from file.
|
||||
file_to_ingest->smallest_internal_key = InternalKey("", 0, ValueType::kTypeValue);
|
||||
file_to_ingest->largest_internal_key = InternalKey("", 0, ValueType::kTypeValue);
|
||||
bool bounds_set = false;
|
||||
iter->SeekToFirst();
|
||||
if (iter->Valid()) {
|
||||
@ -423,7 +425,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
||||
if (key.sequence != 0) {
|
||||
return Status::Corruption("external file have non zero sequence number");
|
||||
}
|
||||
file_to_ingest->smallest_user_key = key.user_key.ToString();
|
||||
file_to_ingest->smallest_internal_key.SetFrom(key);
|
||||
|
||||
iter->SeekToLast();
|
||||
if (!ParseInternalKey(iter->key(), &key)) {
|
||||
@ -432,7 +434,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
||||
if (key.sequence != 0) {
|
||||
return Status::Corruption("external file have non zero sequence number");
|
||||
}
|
||||
file_to_ingest->largest_user_key = key.user_key.ToString();
|
||||
file_to_ingest->largest_internal_key.SetFrom(key);
|
||||
|
||||
bounds_set = true;
|
||||
}
|
||||
@ -448,13 +450,13 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
||||
}
|
||||
RangeTombstone tombstone(key, range_del_iter->value());
|
||||
|
||||
if (!bounds_set || ucmp->Compare(tombstone.start_key_,
|
||||
file_to_ingest->smallest_user_key) < 0) {
|
||||
file_to_ingest->smallest_user_key = tombstone.start_key_.ToString();
|
||||
InternalKey start_key = tombstone.SerializeKey();
|
||||
if (!bounds_set || sstableKeyCompare(ucmp, start_key, file_to_ingest->smallest_internal_key) < 0) {
|
||||
file_to_ingest->smallest_internal_key = start_key;
|
||||
}
|
||||
if (!bounds_set || ucmp->Compare(tombstone.end_key_,
|
||||
file_to_ingest->largest_user_key) > 0) {
|
||||
file_to_ingest->largest_user_key = tombstone.end_key_.ToString();
|
||||
InternalKey end_key = tombstone.SerializeEndKey();
|
||||
if (!bounds_set || sstableKeyCompare(ucmp, end_key, file_to_ingest->largest_internal_key) > 0) {
|
||||
file_to_ingest->largest_internal_key = end_key;
|
||||
}
|
||||
bounds_set = true;
|
||||
}
|
||||
@ -496,7 +498,7 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
||||
if (vstorage->NumLevelFiles(lvl) > 0) {
|
||||
bool overlap_with_level = false;
|
||||
status = sv->current->OverlapWithLevelIterator(ro, env_options_,
|
||||
file_to_ingest->smallest_user_key, file_to_ingest->largest_user_key,
|
||||
file_to_ingest->smallest_internal_key.user_key(), file_to_ingest->largest_internal_key.user_key(),
|
||||
lvl, &overlap_with_level);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
@ -630,8 +632,8 @@ bool ExternalSstFileIngestionJob::IngestedFileFitInLevel(
|
||||
}
|
||||
|
||||
auto* vstorage = cfd_->current()->storage_info();
|
||||
Slice file_smallest_user_key(file_to_ingest->smallest_user_key);
|
||||
Slice file_largest_user_key(file_to_ingest->largest_user_key);
|
||||
Slice file_smallest_user_key(file_to_ingest->smallest_internal_key.user_key());
|
||||
Slice file_largest_user_key(file_to_ingest->largest_internal_key.user_key());
|
||||
|
||||
if (vstorage->OverlapInLevel(level, &file_smallest_user_key,
|
||||
&file_largest_user_key)) {
|
||||
|
@ -25,10 +25,10 @@ class Directories;
|
||||
struct IngestedFileInfo {
|
||||
// External file path
|
||||
std::string external_file_path;
|
||||
// Smallest user key in external file
|
||||
std::string smallest_user_key;
|
||||
// Largest user key in external file
|
||||
std::string largest_user_key;
|
||||
// Smallest internal key in external file
|
||||
InternalKey smallest_internal_key;
|
||||
// Largest internal key in external file
|
||||
InternalKey largest_internal_key;
|
||||
// Sequence number for keys in external file
|
||||
SequenceNumber original_seqno;
|
||||
// Offset of the global sequence number field in the file, will
|
||||
@ -62,15 +62,6 @@ struct IngestedFileInfo {
|
||||
// ingestion_options.move_files is false by default, thus copy_file is true
|
||||
// by default.
|
||||
bool copy_file = true;
|
||||
|
||||
InternalKey smallest_internal_key() const {
|
||||
return InternalKey(smallest_user_key, assigned_seqno,
|
||||
ValueType::kTypeValue);
|
||||
}
|
||||
|
||||
InternalKey largest_internal_key() const {
|
||||
return InternalKey(largest_user_key, assigned_seqno, ValueType::kTypeValue);
|
||||
}
|
||||
};
|
||||
|
||||
class ExternalSstFileIngestionJob {
|
||||
|
@ -696,10 +696,10 @@ TEST_F(ExternalSSTFileTest, AddList) {
|
||||
ASSERT_EQ(file6_info.smallest_range_del_key, Key(0));
|
||||
ASSERT_EQ(file6_info.largest_range_del_key, Key(100));
|
||||
|
||||
// file7.sst (delete 100 => 200)
|
||||
// file7.sst (delete 99 => 201)
|
||||
std::string file7 = sst_files_dir_ + "file7.sst";
|
||||
ASSERT_OK(sst_file_writer.Open(file7));
|
||||
ASSERT_OK(sst_file_writer.DeleteRange(Key(100), Key(200)));
|
||||
ASSERT_OK(sst_file_writer.DeleteRange(Key(99), Key(201)));
|
||||
ExternalSstFileInfo file7_info;
|
||||
s = sst_file_writer.Finish(&file7_info);
|
||||
ASSERT_TRUE(s.ok()) << s.ToString();
|
||||
@ -708,8 +708,8 @@ TEST_F(ExternalSSTFileTest, AddList) {
|
||||
ASSERT_EQ(file7_info.smallest_key, "");
|
||||
ASSERT_EQ(file7_info.largest_key, "");
|
||||
ASSERT_EQ(file7_info.num_range_del_entries, 1);
|
||||
ASSERT_EQ(file7_info.smallest_range_del_key, Key(100));
|
||||
ASSERT_EQ(file7_info.largest_range_del_key, Key(200));
|
||||
ASSERT_EQ(file7_info.smallest_range_del_key, Key(99));
|
||||
ASSERT_EQ(file7_info.largest_range_del_key, Key(201));
|
||||
|
||||
// list 1 has internal key range conflict
|
||||
std::vector<std::string> file_list0({file1, file2});
|
||||
@ -724,9 +724,7 @@ TEST_F(ExternalSSTFileTest, AddList) {
|
||||
// These lists of files have key ranges that overlap with each other
|
||||
s = DeprecatedAddFile(file_list1);
|
||||
ASSERT_FALSE(s.ok()) << s.ToString();
|
||||
// Both of the following overlap on the end key of a range deletion
|
||||
// tombstone. This is a limitation because these tombstones have exclusive
|
||||
// end keys that should not count as overlapping with other keys.
|
||||
// Both of the following overlap on the range deletion tombstone.
|
||||
s = DeprecatedAddFile(file_list4);
|
||||
ASSERT_FALSE(s.ok()) << s.ToString();
|
||||
s = DeprecatedAddFile(file_list5);
|
||||
|
@ -58,13 +58,13 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number,
|
||||
std::sort(sorted_files.begin(), sorted_files.end(),
|
||||
[&ucmp](const IngestedFileInfo* info1,
|
||||
const IngestedFileInfo* info2) {
|
||||
return ucmp->Compare(info1->smallest_user_key,
|
||||
info2->smallest_user_key) < 0;
|
||||
return sstableKeyCompare(ucmp, info1->smallest_internal_key,
|
||||
info2->smallest_internal_key) < 0;
|
||||
});
|
||||
|
||||
for (size_t i = 0; i < sorted_files.size() - 1; i++) {
|
||||
if (ucmp->Compare(sorted_files[i]->largest_user_key,
|
||||
sorted_files[i + 1]->smallest_user_key) >= 0) {
|
||||
if (sstableKeyCompare(ucmp, sorted_files[i]->largest_internal_key,
|
||||
sorted_files[i + 1]->smallest_internal_key) >= 0) {
|
||||
return Status::InvalidArgument("Files have overlapping ranges");
|
||||
}
|
||||
}
|
||||
@ -76,8 +76,8 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number,
|
||||
return Status::InvalidArgument("File contain no entries");
|
||||
}
|
||||
|
||||
if (!f.smallest_internal_key().Valid() ||
|
||||
!f.largest_internal_key().Valid()) {
|
||||
if (!f.smallest_internal_key.Valid() ||
|
||||
!f.largest_internal_key.Valid()) {
|
||||
return Status::Corruption("File has corrupted keys");
|
||||
}
|
||||
}
|
||||
@ -137,8 +137,8 @@ Status ImportColumnFamilyJob::Run() {
|
||||
const auto& f = files_to_import_[i];
|
||||
const auto& file_metadata = metadata_[i];
|
||||
edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(),
|
||||
f.fd.GetFileSize(), f.smallest_internal_key(),
|
||||
f.largest_internal_key(), file_metadata.smallest_seqno,
|
||||
f.fd.GetFileSize(), f.smallest_internal_key,
|
||||
f.largest_internal_key, file_metadata.smallest_seqno,
|
||||
file_metadata.largest_seqno, false);
|
||||
|
||||
// If incoming sequence number is higher, update local sequence number.
|
||||
@ -236,14 +236,14 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo(
|
||||
if (!ParseInternalKey(iter->key(), &key)) {
|
||||
return Status::Corruption("external file have corrupted keys");
|
||||
}
|
||||
file_to_import->smallest_user_key = key.user_key.ToString();
|
||||
file_to_import->smallest_internal_key.SetFrom(key);
|
||||
|
||||
// Get last (largest) key from file
|
||||
iter->SeekToLast();
|
||||
if (!ParseInternalKey(iter->key(), &key)) {
|
||||
return Status::Corruption("external file have corrupted keys");
|
||||
}
|
||||
file_to_import->largest_user_key = key.user_key.ToString();
|
||||
file_to_import->largest_internal_key.SetFrom(key);
|
||||
|
||||
file_to_import->cf_id = static_cast<uint32_t>(props->column_family_id);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user