From be81609b43fecf47bf12594cee3015b5fb979d7b Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Fri, 15 Apr 2022 18:12:06 -0700 Subject: [PATCH] Add a `fail_if_not_bottommost_level` to IngestExternalFileOptions (#9849) Summary: This new options allows application to specify that files must be ingested to bottommost level, otherwise the ingestion will fail instead of silently ingesting to a non-bottommost level. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9849 Test Plan: make check Reviewed By: ajkr Differential Revision: D35680307 Pulled By: riversand963 fbshipit-source-id: 01cf54ef6c76198f7654dc06b5544631dea1be1e --- HISTORY.md | 1 + db/external_sst_file_basic_test.cc | 48 +++++++++++++++++++++++++++ db/external_sst_file_ingestion_job.cc | 15 +++++++++ include/rocksdb/db.h | 5 +++ include/rocksdb/options.h | 8 +++++ 5 files changed, 77 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index fb5fff4d0..28a18bbd8 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -28,6 +28,7 @@ ### Public API changes * Exposed APIs to examine results of block cache stats collections in a structured way. In particular, users of `GetMapProperty()` with property `kBlockCacheEntryStats` can now use the functions in `BlockCacheEntryStatsMapKeys` to find stats in the map. +* Add `fail_if_not_bottommost_level` to IngestExternalFileOptions so that ingestion will fail if the file(s) cannot be ingested to the bottommost level. ## 7.1.0 (03/23/2022) ### New Features diff --git a/db/external_sst_file_basic_test.cc b/db/external_sst_file_basic_test.cc index 549171c2c..b6f05a846 100644 --- a/db/external_sst_file_basic_test.cc +++ b/db/external_sst_file_basic_test.cc @@ -1796,6 +1796,54 @@ TEST_F(ExternalSSTFileBasicTest, IngestWithTemperature) { ASSERT_EQ(std::atoi(prop.c_str()), 0); } +TEST_F(ExternalSSTFileBasicTest, FailIfNotBottommostLevel) { + Options options = GetDefaultOptions(); + + std::string file_path = sst_files_dir_ + ToString(1); + SstFileWriter sfw(EnvOptions(), options); + + ASSERT_OK(sfw.Open(file_path)); + ASSERT_OK(sfw.Put("b", "dontcare")); + ASSERT_OK(sfw.Finish()); + + // Test universal compaction + ingest with snapshot consistency + options.create_if_missing = true; + options.compaction_style = CompactionStyle::kCompactionStyleUniversal; + DestroyAndReopen(options); + { + const Snapshot* snapshot = db_->GetSnapshot(); + ManagedSnapshot snapshot_guard(db_, snapshot); + IngestExternalFileOptions ifo; + ifo.fail_if_not_bottommost_level = true; + ifo.snapshot_consistency = true; + const Status s = db_->IngestExternalFile({file_path}, ifo); + ASSERT_TRUE(s.IsTryAgain()); + } + + // Test level compaction + options.compaction_style = CompactionStyle::kCompactionStyleLevel; + options.num_levels = 2; + DestroyAndReopen(options); + ASSERT_OK(db_->Put(WriteOptions(), "a", "dontcare")); + ASSERT_OK(db_->Put(WriteOptions(), "c", "dontcare")); + ASSERT_OK(db_->Flush(FlushOptions())); + + ASSERT_OK(db_->Put(WriteOptions(), "b", "dontcare")); + ASSERT_OK(db_->Put(WriteOptions(), "d", "dontcare")); + ASSERT_OK(db_->Flush(FlushOptions())); + + { + CompactRangeOptions cro; + cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; + ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr)); + + IngestExternalFileOptions ifo; + ifo.fail_if_not_bottommost_level = true; + const Status s = db_->IngestExternalFile({file_path}, ifo); + ASSERT_TRUE(s.IsTryAgain()); + } +} + INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest, testing::Values(std::make_tuple(true, true), std::make_tuple(true, false), diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 16ec558f2..51b636678 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -739,6 +739,12 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile( if (force_global_seqno) { *assigned_seqno = last_seqno + 1; if (compaction_style == kCompactionStyleUniversal || files_overlap_) { + if (ingestion_options_.fail_if_not_bottommost_level) { + status = Status::TryAgain( + "Files cannot be ingested to Lmax. Please make sure key range of " + "Lmax does not overlap with files to ingest."); + return status; + } file_to_ingest->picked_level = 0; return status; } @@ -808,6 +814,15 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile( target_level = 0; *assigned_seqno = last_seqno + 1; } + + if (ingestion_options_.fail_if_not_bottommost_level && + target_level < cfd_->NumberLevels() - 1) { + status = Status::TryAgain( + "Files cannot be ingested to Lmax. Please make sure key range of Lmax " + "does not overlap with files to ingest."); + return status; + } + TEST_SYNC_POINT_CALLBACK( "ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile", &overlap_with_db); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 59d24c64e..4d8e80013 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1566,6 +1566,11 @@ class DB { // (3) If IngestExternalFileOptions->ingest_behind is set to true, // we always ingest at the bottommost level, which should be reserved // for this purpose (see DBOPtions::allow_ingest_behind flag). + // (4) If IngestExternalFileOptions->fail_if_not_bottommost_level is set to + // true, then this method can return Status:TryAgain() indicating that + // the files cannot be ingested to the bottommost level, and it is the + // user's responsibility to clear the bottommost level in the overlapping + // range before re-attempting the ingestion. virtual Status IngestExternalFile( ColumnFamilyHandle* column_family, const std::vector& external_files, diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index fe76ff2cd..b9bc15753 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -1855,6 +1855,14 @@ struct IngestExternalFileOptions { // ingestion. However, if no checksum information is provided with the // ingested files, DB will generate the checksum and store in the Manifest. bool verify_file_checksum = true; + // Set to TRUE if user wants file to be ingested to the bottommost level. An + // error of Status::TryAgain() will be returned if a file cannot fit in the + // bottommost level when calling + // DB::IngestExternalFile()/DB::IngestExternalFiles(). The user should clear + // the bottommost level in the overlapping range before re-attempt. + // + // ingest_behind takes precedence over fail_if_not_bottommost_level. + bool fail_if_not_bottommost_level = false; }; enum TraceFilterType : uint64_t {