diff --git a/db/column_family.h b/db/column_family.h index 857ed818d..94e26ae49 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -350,6 +350,11 @@ class ColumnFamilyData { MemTableList* imm() { return &imm_; } MemTable* mem() { return mem_; } + + bool IsEmpty() { + return mem()->GetFirstSequenceNumber() == 0 && imm()->NumNotFlushed() == 0; + } + Version* current() { return current_; } Version* dummy_versions() { return dummy_versions_; } void SetCurrent(Version* _current); diff --git a/db/db_impl/db_impl_write.cc b/db/db_impl/db_impl_write.cc index 7a92fe611..982359aa5 100644 --- a/db/db_impl/db_impl_write.cc +++ b/db/db_impl/db_impl_write.cc @@ -1819,17 +1819,64 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) { return s; } - for (auto loop_cfd : *versions_->GetColumnFamilySet()) { - // all this is just optimization to delete logs that - // are no longer needed -- if CF is empty, that means it - // doesn't need that particular log to stay alive, so we just - // advance the log number. no need to persist this in the manifest - if (loop_cfd->mem()->GetFirstSequenceNumber() == 0 && - loop_cfd->imm()->NumNotFlushed() == 0) { - if (creating_new_log) { - loop_cfd->SetLogNumber(logfile_number_); + bool empty_cf_updated = false; + if (immutable_db_options_.track_and_verify_wals_in_manifest && + !immutable_db_options_.allow_2pc && creating_new_log) { + // In non-2pc mode, WALs become obsolete if they do not contain unflushed + // data. Updating the empty CF's log number might cause some WALs to become + // obsolete. So we should track the WAL obsoletion event before actually + // updating the empty CF's log number. + uint64_t min_wal_number_to_keep = + versions_->PreComputeMinLogNumberWithUnflushedData(logfile_number_); + if (min_wal_number_to_keep > + versions_->GetWalSet().GetMinWalNumberToKeep()) { + // Get a snapshot of the empty column families. + // LogAndApply may release and reacquire db + // mutex, during that period, column family may become empty (e.g. its + // flush succeeds), then it affects the computed min_log_number_to_keep, + // so we take a snapshot for consistency of column family data + // status. If a column family becomes non-empty afterwards, its active log + // should still be the created new log, so the min_log_number_to_keep is + // not affected. + autovector empty_cfs; + for (auto cf : *versions_->GetColumnFamilySet()) { + if (cf->IsEmpty()) { + empty_cfs.push_back(cf); + } + } + + VersionEdit wal_deletion; + wal_deletion.DeleteWalsBefore(min_wal_number_to_keep); + s = versions_->LogAndApplyToDefaultColumnFamily(&wal_deletion, &mutex_); + if (!s.ok() && versions_->io_status().IsIOError()) { + s = error_handler_.SetBGError(versions_->io_status(), + BackgroundErrorReason::kManifestWrite); + } + if (!s.ok()) { + return s; + } + + for (auto cf : empty_cfs) { + if (cf->IsEmpty()) { + cf->SetLogNumber(logfile_number_); + cf->mem()->SetCreationSeq(versions_->LastSequence()); + } // cf may become non-empty. + } + empty_cf_updated = true; + } + } + if (!empty_cf_updated) { + for (auto cf : *versions_->GetColumnFamilySet()) { + // all this is just optimization to delete logs that + // are no longer needed -- if CF is empty, that means it + // doesn't need that particular log to stay alive, so we just + // advance the log number. no need to persist this in the manifest + if (cf->IsEmpty()) { + if (creating_new_log) { + cf->SetLogNumber(logfile_number_); + } + cf->mem()->SetCreationSeq(versions_->LastSequence()); } - loop_cfd->mem()->SetCreationSeq(versions_->LastSequence()); } } diff --git a/db/version_set.h b/db/version_set.h index 78b26ba4d..7cada5f46 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1127,10 +1127,28 @@ class VersionSet { return PreComputeMinLogNumberWithUnflushedData(nullptr); } // Returns the minimum log number which still has data not flushed to any SST + // file. + // Empty column families' log number is considered to be + // new_log_number_for_empty_cf. + uint64_t PreComputeMinLogNumberWithUnflushedData( + uint64_t new_log_number_for_empty_cf) const { + uint64_t min_log_num = port::kMaxUint64; + for (auto cfd : *column_family_set_) { + // It's safe to ignore dropped column families here: + // cfd->IsDropped() becomes true after the drop is persisted in MANIFEST. + uint64_t num = + cfd->IsEmpty() ? new_log_number_for_empty_cf : cfd->GetLogNumber(); + if (min_log_num > num && !cfd->IsDropped()) { + min_log_num = num; + } + } + return min_log_num; + } + // Returns the minimum log number which still has data not flushed to any SST // file, except data from `cfd_to_skip`. uint64_t PreComputeMinLogNumberWithUnflushedData( const ColumnFamilyData* cfd_to_skip) const { - uint64_t min_log_num = std::numeric_limits::max(); + uint64_t min_log_num = port::kMaxUint64; for (auto cfd : *column_family_set_) { if (cfd == cfd_to_skip) { continue;