From 2736752b33d451e5d57d2754eebacf0afe8d8e33 Mon Sep 17 00:00:00 2001 From: Yanqin Jin Date: Thu, 19 Jul 2018 17:26:25 -0700 Subject: [PATCH] Fix a bug in MANIFEST group commit (#4157) Summary: PR #3944 introduces group commit of `VersionEdit` in MANIFEST. The implementation has a bug. When updating the log file number of each column family, we must consider only `VersionEdit`s that operate on the same column family. Otherwise, a column family may accidentally set its log file number higher than actual value, indicating that log files with smaller file number will be ignored, thus causing some updates to be lost. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4157 Differential Revision: D8916650 Pulled By: riversand963 fbshipit-source-id: 8f456cf688f17bf35ad87b38e30e899aa162f201 --- db/version_set.cc | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index d7aafd71e..49881c451 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2994,25 +2994,31 @@ Status VersionSet::ProcessManifestWrites( delete first_writer.cfd; } } else { - uint64_t max_log_number_in_batch = 0; + // Each version in versions corresponds to a column family. + // For each column family, update its log number indicating that logs + // with number smaller than this should be ignored. + for (const auto version : versions) { + uint64_t max_log_number_in_batch = 0; + uint32_t cf_id = version->cfd_->GetID(); + for (const auto& e : batch_edits) { + if (e->has_log_number_ && e->column_family_ == cf_id) { + max_log_number_in_batch = + std::max(max_log_number_in_batch, e->log_number_); + } + } + if (max_log_number_in_batch != 0) { + assert(version->cfd_->GetLogNumber() <= max_log_number_in_batch); + version->cfd_->SetLogNumber(max_log_number_in_batch); + } + } + uint64_t last_min_log_number_to_keep = 0; for (auto& e : batch_edits) { - if (e->has_log_number_) { - max_log_number_in_batch = - std::max(max_log_number_in_batch, e->log_number_); - } if (e->has_min_log_number_to_keep_) { last_min_log_number_to_keep = std::max(last_min_log_number_to_keep, e->min_log_number_to_keep_); } } - if (max_log_number_in_batch != 0) { - for (int i = 0; i < static_cast(versions.size()); ++i) { - ColumnFamilyData* cfd = versions[i]->cfd_; - assert(cfd->GetLogNumber() <= max_log_number_in_batch); - cfd->SetLogNumber(max_log_number_in_batch); - } - } if (last_min_log_number_to_keep != 0) { // Should only be set in 2PC mode.