From d61d4507c0980b544e87fd0aa5ed2990a45dad5e Mon Sep 17 00:00:00 2001 From: Jeffrey Xiao Date: Wed, 14 Aug 2019 20:58:59 -0700 Subject: [PATCH] Fix IngestExternalFile overlapping check (#5649) Summary: Previously, the end key of a range deletion tombstone was considered exclusive for the purposes of deletion, but considered inclusive when checking if two SSTables overlap. For example, an SSTable with a range deletion tombstone [a, b) would be considered overlapping with an SSTable with a range deletion tombstone [b, c). This commit fixes this check. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5649 Differential Revision: D16808765 Pulled By: anand1976 fbshipit-source-id: 5c7ad1c027e4f778d35070e5dae1b8e6037e0d68 --- db/external_sst_file_basic_test.cc | 42 +++++++++++++++++++++++++ db/external_sst_file_ingestion_job.cc | 44 ++++++++++++++------------- db/external_sst_file_ingestion_job.h | 17 +++-------- db/external_sst_file_test.cc | 12 +++----- db/import_column_family_job.cc | 20 ++++++------ 5 files changed, 84 insertions(+), 51 deletions(-) diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index ff7da502a..475ec7fe8 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -835,6 +835,48 @@ TEST_P(ExternalSSTFileBasicTest, IngestionWithRangeDeletions) { ASSERT_EQ(2, NumTableFilesAtLevel(options.num_levels - 1)); } +TEST_F(ExternalSSTFileBasicTest, AdjacentRangeDeletionTombstones) { + Options options = CurrentOptions(); + SstFileWriter sst_file_writer(EnvOptions(), options); + + // file8.sst (delete 300 => 400) + std::string file8 = sst_files_dir_ + "file8.sst"; + ASSERT_OK(sst_file_writer.Open(file8)); + ASSERT_OK(sst_file_writer.DeleteRange(Key(300), Key(400))); + ExternalSstFileInfo file8_info; + Status s = sst_file_writer.Finish(&file8_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file8_info.file_path, file8); + ASSERT_EQ(file8_info.num_entries, 0); + ASSERT_EQ(file8_info.smallest_key, ""); + ASSERT_EQ(file8_info.largest_key, ""); + ASSERT_EQ(file8_info.num_range_del_entries, 1); + ASSERT_EQ(file8_info.smallest_range_del_key, Key(300)); + ASSERT_EQ(file8_info.largest_range_del_key, Key(400)); + + // file9.sst (delete 400 => 500) + std::string file9 = sst_files_dir_ + "file9.sst"; + ASSERT_OK(sst_file_writer.Open(file9)); + ASSERT_OK(sst_file_writer.DeleteRange(Key(400), Key(500))); + ExternalSstFileInfo file9_info; + s = sst_file_writer.Finish(&file9_info); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(file9_info.file_path, file9); + ASSERT_EQ(file9_info.num_entries, 0); + ASSERT_EQ(file9_info.smallest_key, ""); + ASSERT_EQ(file9_info.largest_key, ""); + ASSERT_EQ(file9_info.num_range_del_entries, 1); + ASSERT_EQ(file9_info.smallest_range_del_key, Key(400)); + ASSERT_EQ(file9_info.largest_range_del_key, Key(500)); + + // Range deletion tombstones are exclusive on their end key, so these SSTs + // should not be considered as overlapping. + s = DeprecatedAddFile({file8, file9}); + ASSERT_TRUE(s.ok()) << s.ToString(); + ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U); + DestroyAndRecreateExternalSSTFilesDir(); +} + TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) { bool change_checksum_called = false; const auto& change_checksum = [&](void* arg) { diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 44b501685..2fce8e01b 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -64,13 +64,13 @@ Status ExternalSstFileIngestionJob::Prepare( std::sort( sorted_files.begin(), sorted_files.end(), [&ucmp](const IngestedFileInfo* info1, const IngestedFileInfo* info2) { - return ucmp->Compare(info1->smallest_user_key, - info2->smallest_user_key) < 0; + return sstableKeyCompare(ucmp, info1->smallest_internal_key, + info2->smallest_internal_key) < 0; }); for (size_t i = 0; i < num_files - 1; i++) { - if (ucmp->Compare(sorted_files[i]->largest_user_key, - sorted_files[i + 1]->smallest_user_key) >= 0) { + if (sstableKeyCompare(ucmp, sorted_files[i]->largest_internal_key, + sorted_files[i + 1]->smallest_internal_key) >= 0) { return Status::NotSupported("Files have overlapping ranges"); } } @@ -81,8 +81,8 @@ Status ExternalSstFileIngestionJob::Prepare( return Status::InvalidArgument("File contain no entries"); } - if (!f.smallest_internal_key().Valid() || - !f.largest_internal_key().Valid()) { + if (!f.smallest_internal_key.Valid() || + !f.largest_internal_key.Valid()) { return Status::Corruption("Generated table have corrupted keys"); } } @@ -178,8 +178,8 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed, SuperVersion* super_version) { autovector ranges; for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) { - ranges.emplace_back(file_to_ingest.smallest_user_key, - file_to_ingest.largest_user_key); + ranges.emplace_back(file_to_ingest.smallest_internal_key.user_key(), + file_to_ingest.largest_internal_key.user_key()); } Status status = cfd_->RangesOverlapWithMemtables(ranges, super_version, flush_needed); @@ -238,8 +238,8 @@ Status ExternalSstFileIngestionJob::Run() { return status; } edit_.AddFile(f.picked_level, f.fd.GetNumber(), f.fd.GetPathId(), - f.fd.GetFileSize(), f.smallest_internal_key(), - f.largest_internal_key(), f.assigned_seqno, f.assigned_seqno, + f.fd.GetFileSize(), f.smallest_internal_key, + f.largest_internal_key, f.assigned_seqno, f.assigned_seqno, false); } return status; @@ -414,6 +414,8 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( table_reader->NewRangeTombstoneIterator(ro)); // Get first (smallest) and last (largest) key from file. + file_to_ingest->smallest_internal_key = InternalKey("", 0, ValueType::kTypeValue); + file_to_ingest->largest_internal_key = InternalKey("", 0, ValueType::kTypeValue); bool bounds_set = false; iter->SeekToFirst(); if (iter->Valid()) { @@ -423,7 +425,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( if (key.sequence != 0) { return Status::Corruption("external file have non zero sequence number"); } - file_to_ingest->smallest_user_key = key.user_key.ToString(); + file_to_ingest->smallest_internal_key.SetFrom(key); iter->SeekToLast(); if (!ParseInternalKey(iter->key(), &key)) { @@ -432,7 +434,7 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( if (key.sequence != 0) { return Status::Corruption("external file have non zero sequence number"); } - file_to_ingest->largest_user_key = key.user_key.ToString(); + file_to_ingest->largest_internal_key.SetFrom(key); bounds_set = true; } @@ -448,13 +450,13 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo( } RangeTombstone tombstone(key, range_del_iter->value()); - if (!bounds_set || ucmp->Compare(tombstone.start_key_, - file_to_ingest->smallest_user_key) < 0) { - file_to_ingest->smallest_user_key = tombstone.start_key_.ToString(); + InternalKey start_key = tombstone.SerializeKey(); + if (!bounds_set || sstableKeyCompare(ucmp, start_key, file_to_ingest->smallest_internal_key) < 0) { + file_to_ingest->smallest_internal_key = start_key; } - if (!bounds_set || ucmp->Compare(tombstone.end_key_, - file_to_ingest->largest_user_key) > 0) { - file_to_ingest->largest_user_key = tombstone.end_key_.ToString(); + InternalKey end_key = tombstone.SerializeEndKey(); + if (!bounds_set || sstableKeyCompare(ucmp, end_key, file_to_ingest->largest_internal_key) > 0) { + file_to_ingest->largest_internal_key = end_key; } bounds_set = true; } @@ -496,7 +498,7 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile( if (vstorage->NumLevelFiles(lvl) > 0) { bool overlap_with_level = false; status = sv->current->OverlapWithLevelIterator(ro, env_options_, - file_to_ingest->smallest_user_key, file_to_ingest->largest_user_key, + file_to_ingest->smallest_internal_key.user_key(), file_to_ingest->largest_internal_key.user_key(), lvl, &overlap_with_level); if (!status.ok()) { return status; @@ -630,8 +632,8 @@ bool ExternalSstFileIngestionJob::IngestedFileFitInLevel( } auto* vstorage = cfd_->current()->storage_info(); - Slice file_smallest_user_key(file_to_ingest->smallest_user_key); - Slice file_largest_user_key(file_to_ingest->largest_user_key); + Slice file_smallest_user_key(file_to_ingest->smallest_internal_key.user_key()); + Slice file_largest_user_key(file_to_ingest->largest_internal_key.user_key()); if (vstorage->OverlapInLevel(level, &file_smallest_user_key, &file_largest_user_key)) { diff --git a/db/external_sst_file_ingestion_job.h b/db/external_sst_file_ingestion_job.h index 50f394405..4f9fac241 100644 --- a/db/external_sst_file_ingestion_job.h +++ b/db/external_sst_file_ingestion_job.h @@ -25,10 +25,10 @@ class Directories; struct IngestedFileInfo { // External file path std::string external_file_path; - // Smallest user key in external file - std::string smallest_user_key; - // Largest user key in external file - std::string largest_user_key; + // Smallest internal key in external file + InternalKey smallest_internal_key; + // Largest internal key in external file + InternalKey largest_internal_key; // Sequence number for keys in external file SequenceNumber original_seqno; // Offset of the global sequence number field in the file, will @@ -62,15 +62,6 @@ struct IngestedFileInfo { // ingestion_options.move_files is false by default, thus copy_file is true // by default. bool copy_file = true; - - InternalKey smallest_internal_key() const { - return InternalKey(smallest_user_key, assigned_seqno, - ValueType::kTypeValue); - } - - InternalKey largest_internal_key() const { - return InternalKey(largest_user_key, assigned_seqno, ValueType::kTypeValue); - } }; class ExternalSstFileIngestionJob { diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index ebd6cb2b1..f5bed1750 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -696,10 +696,10 @@ TEST_F(ExternalSSTFileTest, AddList) { ASSERT_EQ(file6_info.smallest_range_del_key, Key(0)); ASSERT_EQ(file6_info.largest_range_del_key, Key(100)); - // file7.sst (delete 100 => 200) + // file7.sst (delete 99 => 201) std::string file7 = sst_files_dir_ + "file7.sst"; ASSERT_OK(sst_file_writer.Open(file7)); - ASSERT_OK(sst_file_writer.DeleteRange(Key(100), Key(200))); + ASSERT_OK(sst_file_writer.DeleteRange(Key(99), Key(201))); ExternalSstFileInfo file7_info; s = sst_file_writer.Finish(&file7_info); ASSERT_TRUE(s.ok()) << s.ToString(); @@ -708,8 +708,8 @@ TEST_F(ExternalSSTFileTest, AddList) { ASSERT_EQ(file7_info.smallest_key, ""); ASSERT_EQ(file7_info.largest_key, ""); ASSERT_EQ(file7_info.num_range_del_entries, 1); - ASSERT_EQ(file7_info.smallest_range_del_key, Key(100)); - ASSERT_EQ(file7_info.largest_range_del_key, Key(200)); + ASSERT_EQ(file7_info.smallest_range_del_key, Key(99)); + ASSERT_EQ(file7_info.largest_range_del_key, Key(201)); // list 1 has internal key range conflict std::vector file_list0({file1, file2}); @@ -724,9 +724,7 @@ TEST_F(ExternalSSTFileTest, AddList) { // These lists of files have key ranges that overlap with each other s = DeprecatedAddFile(file_list1); ASSERT_FALSE(s.ok()) << s.ToString(); - // Both of the following overlap on the end key of a range deletion - // tombstone. This is a limitation because these tombstones have exclusive - // end keys that should not count as overlapping with other keys. + // Both of the following overlap on the range deletion tombstone. s = DeprecatedAddFile(file_list4); ASSERT_FALSE(s.ok()) << s.ToString(); s = DeprecatedAddFile(file_list5); diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 3c00a2591..cd5914069 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -58,13 +58,13 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number, std::sort(sorted_files.begin(), sorted_files.end(), [&ucmp](const IngestedFileInfo* info1, const IngestedFileInfo* info2) { - return ucmp->Compare(info1->smallest_user_key, - info2->smallest_user_key) < 0; + return sstableKeyCompare(ucmp, info1->smallest_internal_key, + info2->smallest_internal_key) < 0; }); for (size_t i = 0; i < sorted_files.size() - 1; i++) { - if (ucmp->Compare(sorted_files[i]->largest_user_key, - sorted_files[i + 1]->smallest_user_key) >= 0) { + if (sstableKeyCompare(ucmp, sorted_files[i]->largest_internal_key, + sorted_files[i + 1]->smallest_internal_key) >= 0) { return Status::InvalidArgument("Files have overlapping ranges"); } } @@ -76,8 +76,8 @@ Status ImportColumnFamilyJob::Prepare(uint64_t next_file_number, return Status::InvalidArgument("File contain no entries"); } - if (!f.smallest_internal_key().Valid() || - !f.largest_internal_key().Valid()) { + if (!f.smallest_internal_key.Valid() || + !f.largest_internal_key.Valid()) { return Status::Corruption("File has corrupted keys"); } } @@ -137,8 +137,8 @@ Status ImportColumnFamilyJob::Run() { const auto& f = files_to_import_[i]; const auto& file_metadata = metadata_[i]; edit_.AddFile(file_metadata.level, f.fd.GetNumber(), f.fd.GetPathId(), - f.fd.GetFileSize(), f.smallest_internal_key(), - f.largest_internal_key(), file_metadata.smallest_seqno, + f.fd.GetFileSize(), f.smallest_internal_key, + f.largest_internal_key, file_metadata.smallest_seqno, file_metadata.largest_seqno, false); // If incoming sequence number is higher, update local sequence number. @@ -236,14 +236,14 @@ Status ImportColumnFamilyJob::GetIngestedFileInfo( if (!ParseInternalKey(iter->key(), &key)) { return Status::Corruption("external file have corrupted keys"); } - file_to_import->smallest_user_key = key.user_key.ToString(); + file_to_import->smallest_internal_key.SetFrom(key); // Get last (largest) key from file iter->SeekToLast(); if (!ParseInternalKey(iter->key(), &key)) { return Status::Corruption("external file have corrupted keys"); } - file_to_import->largest_user_key = key.user_key.ToString(); + file_to_import->largest_internal_key.SetFrom(key); file_to_import->cf_id = static_cast(props->column_family_id);