From 937fbcbddca3d1a777271feb7dcce850d3bab86f Mon Sep 17 00:00:00 2001 From: slk Date: Wed, 10 Nov 2021 10:47:53 -0800 Subject: [PATCH] Track per-SST user-defined timestamp information in MANIFEST (#9092) Summary: Track per-SST user-defined timestamp information in MANIFEST https://github.com/facebook/rocksdb/issues/8957 Rockdb has supported user-defined timestamp feature. Application can specify a timestamp when writing each k-v pair. When data flush from memory to disk file called SST files, file creation activity will commit to MANIFEST. This commit is for tracking timestamp info in the MANIFEST for each file. The changes involved are as follows: 1) Track max/min timestamp in FileMetaData, and fix invoved codes. 2) Add NewFileCustomTag::kMinTimestamp and NewFileCustomTag::kMinTimestamp in NewFileCustomTag ( in the kNewFile4 part ), and support invoved codes such as VersionEdit Encode and Decode etc. 3) Add unit test code for VersionEdit EncodeDecodeNewFile4, and fix invoved test codes. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9092 Reviewed By: ajkr, akankshamahajan15 Differential Revision: D32252323 Pulled By: riversand963 fbshipit-source-id: d2642898d6e3ad1fef0eb866b98045408bd4e162 --- HISTORY.md | 1 + db/compaction/compaction_job_test.cc | 3 +- db/compaction/compaction_picker_test.cc | 3 +- db/db_impl/db_impl_compaction_flush.cc | 26 +++---- db/db_impl/db_impl_experimental.cc | 3 +- db/db_impl/db_impl_open.cc | 3 +- db/dbformat.cc | 1 + db/dbformat.h | 3 + db/external_sst_file_ingestion_job.cc | 3 +- db/flush_job.cc | 3 +- db/import_column_family_job.cc | 3 +- db/repair.cc | 3 +- db/version_builder_test.cc | 90 ++++++++++++++++--------- db/version_edit.cc | 29 ++++++++ db/version_edit.h | 28 +++++--- db/version_edit_test.cc | 36 +++++++--- db/version_set.cc | 3 +- db/version_set_test.cc | 15 +++-- 18 files changed, 182 insertions(+), 74 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e7abef9fa..7aaee4179 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### New Features * Added new ChecksumType kXXH3 which is faster than kCRC32c on almost all x86\_64 hardware. * Added a new online consistency check for BlobDB which validates that the number/total size of garbage blobs does not exceed the number/total size of all blobs in any given blob file. +* Provided support for tracking per-sst user-defined timestamp information in MANIFEST. ### Bug Fixes * Prevent a `CompactRange()` with `CompactRangeOptions::change_level == true` from possibly causing corruption to the LSM state (overlapping files within a level) when run in parallel with another manual compaction. Note that setting `force_consistency_checks == true` (the default) would cause the DB to enter read-only mode in this scenario and return `Status::Corruption`, rather than committing any corruption. diff --git a/db/compaction/compaction_job_test.cc b/db/compaction/compaction_job_test.cc index 4d46b0f5d..17dd35886 100644 --- a/db/compaction/compaction_job_test.cc +++ b/db/compaction/compaction_job_test.cc @@ -205,7 +205,8 @@ class CompactionJobTestBase : public testing::Test { edit.AddFile(level, file_number, 0, 10, smallest_key, largest_key, smallest_seqno, largest_seqno, false, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); mutex_.Lock(); EXPECT_OK( diff --git a/db/compaction/compaction_picker_test.cc b/db/compaction/compaction_picker_test.cc index 8075fe188..07b2c404f 100644 --- a/db/compaction/compaction_picker_test.cc +++ b/db/compaction/compaction_picker_test.cc @@ -114,7 +114,8 @@ class CompactionPickerTest : public testing::Test { InternalKey(largest, largest_seq, kTypeValue), smallest_seq, largest_seq, marked_for_compact, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); f->compensated_file_size = (compensated_file_size != 0) ? compensated_file_size : file_size; f->temperature = temperature; diff --git a/db/db_impl/db_impl_compaction_flush.cc b/db/db_impl/db_impl_compaction_flush.cc index d2a49fbbf..17bdb027f 100644 --- a/db/db_impl/db_impl_compaction_flush.cc +++ b/db/db_impl/db_impl_compaction_flush.cc @@ -1589,12 +1589,12 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { edit.SetColumnFamily(cfd->GetID()); for (const auto& f : vstorage->LevelFiles(level)) { edit.DeleteFile(level, f->fd.GetNumber()); - edit.AddFile(to_level, f->fd.GetNumber(), f->fd.GetPathId(), - f->fd.GetFileSize(), f->smallest, f->largest, - f->fd.smallest_seqno, f->fd.largest_seqno, - f->marked_for_compaction, f->oldest_blob_file_number, - f->oldest_ancester_time, f->file_creation_time, - f->file_checksum, f->file_checksum_func_name); + edit.AddFile( + to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(), + f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno, + f->marked_for_compaction, f->oldest_blob_file_number, + f->oldest_ancester_time, f->file_creation_time, f->file_checksum, + f->file_checksum_func_name, f->min_timestamp, f->max_timestamp); } ROCKS_LOG_DEBUG(immutable_db_options_.info_log, "[%s] Apply version edit:\n%s", cfd->GetName().c_str(), @@ -3170,13 +3170,13 @@ Status DBImpl::BackgroundCompaction(bool* made_progress, for (size_t i = 0; i < c->num_input_files(l); i++) { FileMetaData* f = c->input(l, i); c->edit()->DeleteFile(c->level(l), f->fd.GetNumber()); - c->edit()->AddFile(c->output_level(), f->fd.GetNumber(), - f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest, - f->largest, f->fd.smallest_seqno, - f->fd.largest_seqno, f->marked_for_compaction, - f->oldest_blob_file_number, f->oldest_ancester_time, - f->file_creation_time, f->file_checksum, - f->file_checksum_func_name); + c->edit()->AddFile( + c->output_level(), f->fd.GetNumber(), f->fd.GetPathId(), + f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno, + f->fd.largest_seqno, f->marked_for_compaction, + f->oldest_blob_file_number, f->oldest_ancester_time, + f->file_creation_time, f->file_checksum, f->file_checksum_func_name, + f->min_timestamp, f->max_timestamp); ROCKS_LOG_BUFFER( log_buffer, diff --git a/db/db_impl/db_impl_experimental.cc b/db/db_impl/db_impl_experimental.cc index ab5cdcb04..da38cd234 100644 --- a/db/db_impl/db_impl_experimental.cc +++ b/db/db_impl/db_impl_experimental.cc @@ -131,7 +131,8 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) { f->fd.smallest_seqno, f->fd.largest_seqno, f->marked_for_compaction, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, - f->file_checksum, f->file_checksum_func_name); + f->file_checksum, f->file_checksum_func_name, + f->min_timestamp, f->max_timestamp); } status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 2caca7b3e..365747925 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -1463,7 +1463,8 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, meta.fd.smallest_seqno, meta.fd.largest_seqno, meta.marked_for_compaction, meta.oldest_blob_file_number, meta.oldest_ancester_time, meta.file_creation_time, - meta.file_checksum, meta.file_checksum_func_name); + meta.file_checksum, meta.file_checksum_func_name, + meta.min_timestamp, meta.max_timestamp); for (const auto& blob : blob_file_additions) { edit->AddBlobFile(blob); diff --git a/db/dbformat.cc b/db/dbformat.cc index 6afdc3dc2..ae9f1b30f 100644 --- a/db/dbformat.cc +++ b/db/dbformat.cc @@ -28,6 +28,7 @@ namespace ROCKSDB_NAMESPACE { // ValueType, not the lowest). const ValueType kValueTypeForSeek = kTypeDeletionWithTimestamp; const ValueType kValueTypeForSeekForPrev = kTypeDeletion; +const std::string kDisableUserTimestamp(""); EntryType GetEntryType(ValueType value_type) { switch (value_type) { diff --git a/db/dbformat.h b/db/dbformat.h index 1d229adf3..b67c68697 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -93,6 +93,9 @@ static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64; constexpr uint64_t kNumInternalBytes = 8; +// Defined in dbformat.cc +extern const std::string kDisableUserTimestamp; + // The data structure that represents an internal key in the way that user_key, // sequence number and type are stored in separated forms. struct ParsedInternalKey { diff --git a/db/external_sst_file_ingestion_job.cc b/db/external_sst_file_ingestion_job.cc index 2d617b2a3..0557b9168 100644 --- a/db/external_sst_file_ingestion_job.cc +++ b/db/external_sst_file_ingestion_job.cc @@ -441,7 +441,8 @@ Status ExternalSstFileIngestionJob::Run() { f.fd.GetNumber(), f.fd.GetPathId(), f.fd.GetFileSize(), f.smallest_internal_key, f.largest_internal_key, f.assigned_seqno, f.assigned_seqno, false, kInvalidBlobFileNumber, oldest_ancester_time, - current_time, f.file_checksum, f.file_checksum_func_name); + current_time, f.file_checksum, f.file_checksum_func_name, + kDisableUserTimestamp, kDisableUserTimestamp); f_metadata.temperature = f.file_temperature; edit_.AddFile(f.picked_level, f_metadata); } diff --git a/db/flush_job.cc b/db/flush_job.cc index c3a70d160..65716b689 100644 --- a/db/flush_job.cc +++ b/db/flush_job.cc @@ -962,7 +962,8 @@ Status FlushJob::WriteLevel0Table() { meta_.fd.smallest_seqno, meta_.fd.largest_seqno, meta_.marked_for_compaction, meta_.oldest_blob_file_number, meta_.oldest_ancester_time, meta_.file_creation_time, - meta_.file_checksum, meta_.file_checksum_func_name); + meta_.file_checksum, meta_.file_checksum_func_name, + meta_.min_timestamp, meta_.max_timestamp); edit_->SetBlobFileAdditions(std::move(blob_file_additions)); } diff --git a/db/import_column_family_job.cc b/db/import_column_family_job.cc index 4ddbb5c86..b9390565a 100644 --- a/db/import_column_family_job.cc +++ b/db/import_column_family_job.cc @@ -155,7 +155,8 @@ Status ImportColumnFamilyJob::Run() { f.largest_internal_key, file_metadata.smallest_seqno, file_metadata.largest_seqno, false, kInvalidBlobFileNumber, oldest_ancester_time, current_time, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); // If incoming sequence number is higher, update local sequence number. if (file_metadata.largest_seqno > versions_->LastSequence()) { diff --git a/db/repair.cc b/db/repair.cc index cef0f7369..720b0296b 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -635,7 +635,8 @@ class Repairer { table->meta.fd.largest_seqno, table->meta.marked_for_compaction, table->meta.oldest_blob_file_number, table->meta.oldest_ancester_time, table->meta.file_creation_time, - table->meta.file_checksum, table->meta.file_checksum_func_name); + table->meta.file_checksum, table->meta.file_checksum_func_name, + table->meta.min_timestamp, table->meta.max_timestamp); } assert(next_file_number_ > 0); vset_.MarkFileNumberUsed(next_file_number_ - 1); diff --git a/db/version_builder_test.cc b/db/version_builder_test.cc index e8d7b9a1f..4ef3612d8 100644 --- a/db/version_builder_test.cc +++ b/db/version_builder_test.cc @@ -69,7 +69,8 @@ class VersionBuilderTest : public testing::Test { GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno, /* marked_for_compact */ false, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); f->compensated_file_size = file_size; f->num_entries = num_entries; f->num_deletions = num_deletions; @@ -130,7 +131,8 @@ class VersionBuilderTest : public testing::Test { smallest_seqno, largest_seqno, marked_for_compaction, blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); } static std::shared_ptr GetBlobFileMetaData( @@ -190,7 +192,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveTo) { GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.DeleteFile(3, 27U); EnvOptions env_options; @@ -230,7 +233,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic) { GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 88U); @@ -273,7 +277,8 @@ TEST_F(VersionBuilderTest, ApplyAndSaveToDynamic2) { GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.DeleteFile(0, 1U); version_edit.DeleteFile(0, 88U); version_edit.DeleteFile(4, 6U); @@ -307,27 +312,32 @@ TEST_F(VersionBuilderTest, ApplyMultipleAndSaveTo) { GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); EnvOptions env_options; constexpr TableCache* table_cache = nullptr; @@ -364,27 +374,32 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { GetInternalKey("350"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); ASSERT_OK(version_builder.Apply(&version_edit)); VersionEdit version_edit2; @@ -392,14 +407,16 @@ TEST_F(VersionBuilderTest, ApplyDeleteAndSaveTo) { GetInternalKey("950"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); version_edit2.DeleteFile(2, 616); version_edit2.DeleteFile(2, 636); version_edit.AddFile(2, 806, 0, 100U, GetInternalKey("801"), GetInternalKey("850"), 200, 200, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); ASSERT_OK(version_builder.Apply(&version_edit2)); ASSERT_OK(version_builder.SaveTo(&new_vstorage)); @@ -500,7 +517,8 @@ TEST_F(VersionBuilderTest, ApplyFileDeletionAndAddition) { GetInternalKey(largest, largest_seq), smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); ASSERT_OK(builder.Apply(&addition)); @@ -544,7 +562,8 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyInBase) { smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); const Status s = builder.Apply(&edit); ASSERT_TRUE(s.IsCorruption()); @@ -577,7 +596,8 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) { GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); ASSERT_OK(builder.Apply(&edit)); @@ -590,7 +610,8 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAlreadyApplied) { smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); const Status s = builder.Apply(&other_edit); ASSERT_TRUE(s.IsCorruption()); @@ -624,7 +645,8 @@ TEST_F(VersionBuilderTest, ApplyFileAdditionAndDeletion) { smallest_seqno, largest_seqno, marked_for_compaction, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); ASSERT_OK(builder.Apply(&addition)); @@ -1154,7 +1176,8 @@ TEST_F(VersionBuilderTest, SaveBlobFilesToConcurrentJobs) { GetInternalKey(smallest), GetInternalKey(largest), smallest_seqno, largest_seqno, marked_for_compaction, blob_file_number, kUnknownOldestAncesterTime, - kUnknownFileCreationTime, checksum_value, checksum_method); + kUnknownFileCreationTime, checksum_value, checksum_method, + kDisableUserTimestamp, kDisableUserTimestamp); edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes, checksum_method, checksum_value); @@ -1238,7 +1261,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { /* largest_seqno */ 200, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0, /* file_size */ 100, /* smallest */ GetInternalKey("801"), @@ -1246,7 +1270,8 @@ TEST_F(VersionBuilderTest, CheckConsistencyForBlobFiles) { /* largest_seqno */ 200, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000, /* total_blob_bytes */ 200000, /* checksum_method */ std::string(), @@ -1464,7 +1489,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* largest_seqno */ 2100, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); // Add an SST that does not reference any blob files. edit.AddFile( @@ -1474,7 +1500,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* largest_seqno */ 2200, /* marked_for_compaction */ false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); // Delete a file that references a blob file. edit.DeleteFile(/* level */ 1, /* file_number */ 6); @@ -1496,7 +1523,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* largest_seqno */ 300, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); // Trivially move a file that does not reference any blob files. edit.DeleteFile(/* level */ 1, /* file_number */ 13); @@ -1507,7 +1535,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* largest_seqno */ 1300, /* marked_for_compaction */ false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); // Add one more SST file that references a blob file, then promptly // delete it in a second version edit before the new version gets saved. @@ -1519,7 +1548,8 @@ TEST_F(VersionBuilderTest, MaintainLinkedSstsForBlobFiles) { /* largest_seqno */ 2300, /* marked_for_compaction */ false, /* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); VersionEdit edit2; diff --git a/db/version_edit.cc b/db/version_edit.cc index 8cb173a2d..812e7b735 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -186,6 +186,16 @@ bool VersionEdit::EncodeTo(std::string* dst) const { PutVarint32(dst, NewFileCustomTag::kFileChecksumFuncName); PutLengthPrefixedSlice(dst, Slice(f.file_checksum_func_name)); + if (f.max_timestamp != kDisableUserTimestamp) { + if (f.min_timestamp.size() != f.max_timestamp.size()) { + assert(false); + return false; + } + PutVarint32(dst, NewFileCustomTag::kMinTimestamp); + PutLengthPrefixedSlice(dst, Slice(f.min_timestamp)); + PutVarint32(dst, NewFileCustomTag::kMaxTimestamp); + PutLengthPrefixedSlice(dst, Slice(f.max_timestamp)); + } if (f.fd.GetPathId() != 0) { PutVarint32(dst, NewFileCustomTag::kPathId); char p = static_cast(f.fd.GetPathId()); @@ -315,6 +325,10 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) { return "new-file4 custom field"; } if (custom_tag == kTerminate) { + if (f.min_timestamp.size() != f.max_timestamp.size()) { + assert(false); + return "new-file4 custom field timestamp size mismatch error"; + } break; } if (!GetLengthPrefixedSlice(input, &field)) { @@ -375,6 +389,12 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) { } } break; + case kMinTimestamp: + f.min_timestamp = field.ToString(); + break; + case kMaxTimestamp: + f.max_timestamp = field.ToString(); + break; default: if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) { // Should not proceed if cannot understand it @@ -781,6 +801,10 @@ std::string VersionEdit::DebugString(bool hex_key) const { r.append(" blob_file:"); AppendNumberTo(&r, f.oldest_blob_file_number); } + r.append(" min_timestamp:"); + r.append(Slice(f.min_timestamp).ToString(true)); + r.append(" max_timestamp:"); + r.append(Slice(f.max_timestamp).ToString(true)); r.append(" oldest_ancester_time:"); AppendNumberTo(&r, f.oldest_ancester_time); r.append(" file_creation_time:"); @@ -894,6 +918,11 @@ std::string VersionEdit::DebugJSON(int edit_num, bool hex_key) const { jw << "FileSize" << f.fd.GetFileSize(); jw << "SmallestIKey" << f.smallest.DebugString(hex_key); jw << "LargestIKey" << f.largest.DebugString(hex_key); + if (f.min_timestamp != kDisableUserTimestamp) { + assert(f.max_timestamp != kDisableUserTimestamp); + jw << "MinTimestamp" << Slice(f.min_timestamp).ToString(true); + jw << "MaxTimestamp" << Slice(f.max_timestamp).ToString(true); + } if (f.oldest_blob_file_number != kInvalidBlobFileNumber) { jw << "OldestBlobFile" << f.oldest_blob_file_number; } diff --git a/db/version_edit.h b/db/version_edit.h index 600634927..772ca24c6 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -82,6 +82,8 @@ enum NewFileCustomTag : uint32_t { kFileChecksum = 7, kFileChecksumFuncName = 8, kTemperature = 9, + kMinTimestamp = 10, + kMaxTimestamp = 11, // If this bit for the custom tag is set, opening DB should fail if // we don't know this field. @@ -209,6 +211,10 @@ struct FileMetaData { // File checksum function name std::string file_checksum_func_name = kUnknownFileChecksumFuncName; + // Min (oldest) timestamp of keys in this file + std::string min_timestamp; + // Max (newest) timestamp of keys in this file + std::string max_timestamp; FileMetaData() = default; @@ -218,7 +224,8 @@ struct FileMetaData { const SequenceNumber& largest_seq, bool marked_for_compact, uint64_t oldest_blob_file, uint64_t _oldest_ancester_time, uint64_t _file_creation_time, const std::string& _file_checksum, - const std::string& _file_checksum_func_name) + const std::string& _file_checksum_func_name, + std::string _min_timestamp, std::string _max_timestamp) : fd(file, file_path_id, file_size, smallest_seq, largest_seq), smallest(smallest_key), largest(largest_key), @@ -227,7 +234,9 @@ struct FileMetaData { oldest_ancester_time(_oldest_ancester_time), file_creation_time(_file_creation_time), file_checksum(_file_checksum), - file_checksum_func_name(_file_checksum_func_name) { + file_checksum_func_name(_file_checksum_func_name), + min_timestamp(std::move(_min_timestamp)), + max_timestamp(std::move(_max_timestamp)) { TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this); } @@ -394,14 +403,17 @@ class VersionEdit { const SequenceNumber& largest_seqno, bool marked_for_compaction, uint64_t oldest_blob_file_number, uint64_t oldest_ancester_time, uint64_t file_creation_time, const std::string& file_checksum, - const std::string& file_checksum_func_name) { + const std::string& file_checksum_func_name, + const std::string& min_timestamp, + const std::string& max_timestamp) { assert(smallest_seqno <= largest_seqno); new_files_.emplace_back( - level, FileMetaData(file, file_path_id, file_size, smallest, largest, - smallest_seqno, largest_seqno, - marked_for_compaction, oldest_blob_file_number, - oldest_ancester_time, file_creation_time, - file_checksum, file_checksum_func_name)); + level, + FileMetaData(file, file_path_id, file_size, smallest, largest, + smallest_seqno, largest_seqno, marked_for_compaction, + oldest_blob_file_number, oldest_ancester_time, + file_creation_time, file_checksum, file_checksum_func_name, + min_timestamp, max_timestamp)); } void AddFile(int level, const FileMetaData& f) { diff --git a/db/version_edit_test.cc b/db/version_edit_test.cc index 43ae6840f..7668599df 100644 --- a/db/version_edit_test.cc +++ b/db/version_edit_test.cc @@ -40,7 +40,7 @@ TEST_F(VersionEditTest, EncodeDecode) { InternalKey("foo", kBig + 500 + i, kTypeValue), InternalKey("zoo", kBig + 600 + i, kTypeDeletion), kBig + 500 + i, kBig + 600 + i, false, kInvalidBlobFileNumber, - 888, 678, "234", "crc32c"); + 888, 678, "234", "crc32c", "123", "345"); edit.DeleteFile(4, kBig + 700 + i); } @@ -59,21 +59,24 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) { InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, kBig + 600, true, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, "123", + "234"); edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, kBig + 601, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, "345", + "543"); edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue), InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502, kBig + 602, true, kInvalidBlobFileNumber, 666, 888, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, "456", + "567"); edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex), InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503, kBig + 603, true, 1001, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, "678", "789"); ; edit.DeleteFile(4, 700); @@ -105,6 +108,14 @@ TEST_F(VersionEditTest, EncodeDecodeNewFile4) { ASSERT_EQ(kInvalidBlobFileNumber, new_files[2].second.oldest_blob_file_number); ASSERT_EQ(1001, new_files[3].second.oldest_blob_file_number); + ASSERT_EQ("123", new_files[0].second.min_timestamp); + ASSERT_EQ("234", new_files[0].second.max_timestamp); + ASSERT_EQ("345", new_files[1].second.min_timestamp); + ASSERT_EQ("543", new_files[1].second.max_timestamp); + ASSERT_EQ("456", new_files[2].second.min_timestamp); + ASSERT_EQ("567", new_files[2].second.max_timestamp); + ASSERT_EQ("678", new_files[3].second.min_timestamp); + ASSERT_EQ("789", new_files[3].second.max_timestamp); } TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { @@ -114,11 +125,12 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, kBig + 600, true, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, "123", + "234"); edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue), InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501, kBig + 601, false, kInvalidBlobFileNumber, 686, 868, "234", - "crc32c"); + "crc32c", kDisableUserTimestamp, kDisableUserTimestamp); edit.DeleteFile(4, 700); edit.SetComparatorName("foo"); @@ -157,6 +169,10 @@ TEST_F(VersionEditTest, ForwardCompatibleNewFile4) { ASSERT_EQ(3u, new_files[0].second.fd.GetPathId()); ASSERT_EQ(3u, new_files[1].second.fd.GetPathId()); ASSERT_EQ(1u, parsed.GetDeletedFiles().size()); + ASSERT_EQ("123", new_files[0].second.min_timestamp); + ASSERT_EQ("234", new_files[0].second.max_timestamp); + ASSERT_EQ(kDisableUserTimestamp, new_files[1].second.min_timestamp); + ASSERT_EQ(kDisableUserTimestamp, new_files[1].second.max_timestamp); } TEST_F(VersionEditTest, NewFile4NotSupportedField) { @@ -166,7 +182,8 @@ TEST_F(VersionEditTest, NewFile4NotSupportedField) { InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500, kBig + 600, true, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); edit.SetComparatorName("foo"); edit.SetLogNumber(kBig + 100); @@ -196,7 +213,8 @@ TEST_F(VersionEditTest, EncodeEmptyFile) { edit.AddFile(0, 0, 0, 0, InternalKey(), InternalKey(), 0, 0, false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); std::string buffer; ASSERT_TRUE(!edit.EncodeTo(&buffer)); } diff --git a/db/version_set.cc b/db/version_set.cc index 278878703..98a65a685 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -5376,7 +5376,8 @@ Status VersionSet::WriteCurrentStateToManifest( f->fd.smallest_seqno, f->fd.largest_seqno, f->marked_for_compaction, f->oldest_blob_file_number, f->oldest_ancester_time, f->file_creation_time, - f->file_checksum, f->file_checksum_func_name); + f->file_checksum, f->file_checksum_func_name, + f->min_timestamp, f->max_timestamp); } } diff --git a/db/version_set_test.cc b/db/version_set_test.cc index 34b6d3bb2..4d2c4c8cd 100644 --- a/db/version_set_test.cc +++ b/db/version_set_test.cc @@ -46,7 +46,8 @@ class GenerateLevelFilesBriefTest : public testing::Test { InternalKey(largest, largest_seq, kTypeValue), smallest_seq, largest_seq, /* marked_for_compact */ false, kInvalidBlobFileNumber, kUnknownOldestAncesterTime, kUnknownFileCreationTime, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); files_.push_back(f); } @@ -154,7 +155,8 @@ class VersionStorageInfoTestBase : public testing::Test { /* largest_seq */ 0, /* marked_for_compact */ false, oldest_blob_file_number, kUnknownOldestAncesterTime, kUnknownFileCreationTime, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, kDisableUserTimestamp, + kDisableUserTimestamp); f->compensated_file_size = file_size; vstorage_.AddFile(level, f); } @@ -2996,7 +2998,8 @@ class VersionSetTestMissingFiles : public VersionSetTestBase, ASSERT_NE(0, file_size); file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey, ikey, 0, 0, false, 0, 0, 0, kUnknownFileChecksum, - kUnknownFileChecksumFuncName); + kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); } } @@ -3051,7 +3054,8 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) { FileMetaData meta = FileMetaData(file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, largest_ikey, 0, 0, false, 0, 0, 0, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest( @@ -3106,7 +3110,8 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) { FileMetaData meta = FileMetaData(file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey, largest_ikey, 0, 0, false, 0, 0, 0, - kUnknownFileChecksum, kUnknownFileChecksumFuncName); + kUnknownFileChecksum, kUnknownFileChecksumFuncName, + kDisableUserTimestamp, kDisableUserTimestamp); added_files.emplace_back(0, meta); } WriteFileAdditionAndDeletionToManifest(