From 18cf0de64085b74388ba4df029ea8d7c83ec428d Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Mon, 30 Mar 2020 18:57:28 -0700 Subject: [PATCH] Use flush time for the props.creation_time for FIFO compaction (#6612) Summary: For FIFO compaction, we use flush time instead of oldest key time as the creation time. This is to prevent FIFO compaction dropping files whose oldest key time is older than TTL but which has newer keys than TTL. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6612 Test Plan: make check Reviewed By: siying Differential Revision: D20748217 Pulled By: riversand963 fbshipit-source-id: 3f7b00a847020760537cdddd12f6fe039e5bc663 --- HISTORY.md | 4 ++++ db/compaction/compaction_picker_fifo.cc | 18 ++++++++++++------ db/db_compaction_test.cc | 25 ------------------------- db/flush_job.cc | 8 ++++++-- include/rocksdb/advanced_options.h | 2 ++ 5 files changed, 24 insertions(+), 33 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 00c6bac43..eaba9b86e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,8 @@ # Rocksdb Change Log +## Unreleased +### Behavior changes +* Since RocksDB 6.8, ttl-based FIFO compaction can drop a file whose oldest key becomes older than options.ttl while others have not. This fix reverts this and makes ttl-based FIFO compaction use the file's flush time as the criterion. This fix also requires that max_open_files = -1 and compaction_options_fifo.allow_compaction = false to function properly. + ## 6.9.0 (03/29/2020) ### Public API Change * Fix spelling so that API now has correctly spelled transaction state name `COMMITTED`, while the old misspelled `COMMITED` is still available as an alias. diff --git a/db/compaction/compaction_picker_fifo.cc b/db/compaction/compaction_picker_fifo.cc index 947c40cf6..8a4ea7a8e 100644 --- a/db/compaction/compaction_picker_fifo.cc +++ b/db/compaction/compaction_picker_fifo.cc @@ -71,10 +71,13 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( if (current_time > mutable_cf_options.ttl) { for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) { FileMetaData* f = *ritr; - uint64_t creation_time = f->TryGetFileCreationTime(); - if (creation_time == kUnknownFileCreationTime || - creation_time >= (current_time - mutable_cf_options.ttl)) { - break; + if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { + uint64_t creation_time = + f->fd.table_reader->GetTableProperties()->creation_time; + if (creation_time == 0 || + creation_time >= (current_time - mutable_cf_options.ttl)) { + break; + } } total_size -= f->compensated_file_size; inputs[0].files.push_back(f); @@ -92,11 +95,14 @@ Compaction* FIFOCompactionPicker::PickTTLCompaction( } for (const auto& f : inputs[0].files) { + uint64_t creation_time = 0; + if (f && f->fd.table_reader && f->fd.table_reader->GetTableProperties()) { + creation_time = f->fd.table_reader->GetTableProperties()->creation_time; + } ROCKS_LOG_BUFFER(log_buffer, "[%s] FIFO compaction: picking file %" PRIu64 " with creation time %" PRIu64 " for deletion", - cf_name.c_str(), f->fd.GetNumber(), - f->TryGetFileCreationTime()); + cf_name.c_str(), f->fd.GetNumber(), creation_time); } Compaction* c = new Compaction( diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 554a02cbc..af2e7cdc2 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -5217,31 +5217,6 @@ TEST_P(DBCompactionTestWithParam, } } -TEST_F(DBCompactionTest, FifoCompactionGetFileCreationTime) { - MockEnv mock_env(env_); - do { - Options options = CurrentOptions(); - options.table_factory.reset(new BlockBasedTableFactory()); - options.env = &mock_env; - options.ttl = static_cast(24) * 3600; - options.compaction_style = kCompactionStyleFIFO; - constexpr size_t kNumFiles = 24; - options.max_open_files = 20; - constexpr size_t kNumKeysPerFile = 10; - DestroyAndReopen(options); - for (size_t i = 0; i < kNumFiles; ++i) { - for (size_t j = 0; j < kNumKeysPerFile; ++j) { - ASSERT_OK(Put(std::to_string(j), "value_" + std::to_string(i))); - } - ASSERT_OK(Flush()); - } - mock_env.FakeSleepForMicroseconds( - static_cast(1000 * 1000 * (1 + options.ttl))); - ASSERT_OK(Put("foo", "value")); - ASSERT_OK(Flush()); - } while (ChangeOptions()); -} - #endif // !defined(ROCKSDB_LITE) } // namespace ROCKSDB_NAMESPACE diff --git a/db/flush_job.cc b/db/flush_job.cc index c45e33b30..90da8057e 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -375,6 +375,11 @@ Status FlushJob::WriteLevel0Table() { meta_.oldest_ancester_time = std::min(current_time, oldest_key_time); meta_.file_creation_time = current_time; + uint64_t creation_time = (cfd_->ioptions()->compaction_style == + CompactionStyle::kCompactionStyleFIFO) + ? current_time + : meta_.oldest_ancester_time; + IOStatus io_s; s = BuildTable( dbname_, db_options_.env, db_options_.fs.get(), *cfd_->ioptions(), @@ -388,8 +393,7 @@ Status FlushJob::WriteLevel0Table() { mutable_cf_options_.paranoid_file_checks, cfd_->internal_stats(), TableFileCreationReason::kFlush, &io_s, event_logger_, job_context_->job_id, Env::IO_HIGH, &table_properties_, 0 /* level */, - meta_.oldest_ancester_time, oldest_key_time, write_hint, - current_time); + creation_time, oldest_key_time, write_hint, current_time); if (!io_s.ok()) { io_status_ = io_s; } diff --git a/include/rocksdb/advanced_options.h b/include/rocksdb/advanced_options.h index fe6ce9250..a72edbe05 100644 --- a/include/rocksdb/advanced_options.h +++ b/include/rocksdb/advanced_options.h @@ -645,6 +645,7 @@ struct AdvancedColumnFamilyOptions { bool report_bg_io_stats = false; // Files older than TTL will go through the compaction process. + // Pre-req: This needs max_open_files to be set to -1. // In Level: Non-bottom-level files older than TTL will go through the // compation process. // In FIFO: Files older than TTL will be deleted. @@ -672,6 +673,7 @@ struct AdvancedColumnFamilyOptions { // Supported in Level and FIFO compaction. // In FIFO compaction, this option has the same meaning as TTL and whichever // stricter will be used. + // Pre-req: max_open_file == -1. // unit: seconds. Ex: 7 days = 7 * 24 * 60 * 60 // // Values: