Encode min_log_number_to_keep and delete_wals_before in one version edit (#9766)
Summary: min_log_number_to_keep denotes that the WALs whose numbers are below this value **will** be deleted by RocksDB. delete_wals_before will be used by RocksDB if track_and_verify_wals_in_manifest is set to true. During recovery, RocksDB uses the info encoded in delete_wals_before to reconstruct its knowledge about what WALs to expect existing. If these two tags are not encoded in the same VersionEdit, then it's possible for min_log_number_to_keep=100 to exist, but delete_wals_before=100 to be lost due to power failure. Subsequent recovery will delete 99.log. If the db crashes again, the following recovery will expect to see 99.log since there is no delete_wals_before=100 in the MANIFEST, but the WAL is already deleted. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9766 Test Plan: First of all, make check. Second, format compatibility. SHORT_TEST=1 ./tools/check_format_compatible.sh Reviewed By: ltamasi Differential Revision: D35203623 Pulled By: riversand963 fbshipit-source-id: 45623fc4b4b50d299d5e0f9559a3a4c5e9522c8f
This commit is contained in:
parent
76383bea5d
commit
6eafdf135a
@ -6,6 +6,7 @@
|
|||||||
* Fixed a heap use-after-free race with DropColumnFamily.
|
* Fixed a heap use-after-free race with DropColumnFamily.
|
||||||
* Fixed a bug that `rocksdb.read.block.compaction.micros` cannot track compaction stats (#9722).
|
* Fixed a bug that `rocksdb.read.block.compaction.micros` cannot track compaction stats (#9722).
|
||||||
* Fixed `file_type`, `relative_filename` and `directory` fields returned by `GetLiveFilesMetaData()`, which were added in inheriting from `FileStorageInfo`.
|
* Fixed `file_type`, `relative_filename` and `directory` fields returned by `GetLiveFilesMetaData()`, which were added in inheriting from `FileStorageInfo`.
|
||||||
|
* Fixed a bug affecting `track_and_verify_wals_in_manifest`. Without the fix, application may see "open error: Corruption: Missing WAL with log number" while trying to open the db. The corruption is a false alarm but prevents DB open (#9766).
|
||||||
|
|
||||||
### New Features
|
### New Features
|
||||||
* For db_bench when --seed=0 or --seed is not set then it uses the current time as the seed value. Previously it used the value 1000.
|
* For db_bench when --seed=0 or --seed is not set then it uses the current time as the seed value. Previously it used the value 1000.
|
||||||
|
@ -2271,7 +2271,7 @@ TEST_P(DBAtomicFlushTest, ManualFlushUnder2PC) {
|
|||||||
|
|
||||||
// The recovered min log number with prepared data should be non-zero.
|
// The recovered min log number with prepared data should be non-zero.
|
||||||
// In 2pc mode, MinLogNumberToKeep returns the
|
// In 2pc mode, MinLogNumberToKeep returns the
|
||||||
// VersionSet::min_log_number_to_keep_2pc recovered from MANIFEST, if it's 0,
|
// VersionSet::min_log_number_to_keep recovered from MANIFEST, if it's 0,
|
||||||
// it means atomic flush didn't write the min_log_number_to_keep to MANIFEST.
|
// it means atomic flush didn't write the min_log_number_to_keep to MANIFEST.
|
||||||
cfs.push_back(kDefaultColumnFamilyName);
|
cfs.push_back(kDefaultColumnFamilyName);
|
||||||
ASSERT_OK(TryReopenWithColumnFamilies(cfs, options));
|
ASSERT_OK(TryReopenWithColumnFamilies(cfs, options));
|
||||||
|
@ -505,21 +505,20 @@ Status MemTableList::TryInstallMemtableFlushResults(
|
|||||||
min_wal_number_to_keep =
|
min_wal_number_to_keep =
|
||||||
PrecomputeMinLogNumberToKeepNon2PC(vset, *cfd, edit_list);
|
PrecomputeMinLogNumberToKeepNon2PC(vset, *cfd, edit_list);
|
||||||
}
|
}
|
||||||
edit_list.back()->SetMinLogNumberToKeep(min_wal_number_to_keep);
|
|
||||||
|
|
||||||
std::unique_ptr<VersionEdit> wal_deletion;
|
VersionEdit wal_deletion;
|
||||||
|
wal_deletion.SetMinLogNumberToKeep(min_wal_number_to_keep);
|
||||||
if (vset->db_options()->track_and_verify_wals_in_manifest) {
|
if (vset->db_options()->track_and_verify_wals_in_manifest) {
|
||||||
if (min_wal_number_to_keep >
|
if (min_wal_number_to_keep >
|
||||||
vset->GetWalSet().GetMinWalNumberToKeep()) {
|
vset->GetWalSet().GetMinWalNumberToKeep()) {
|
||||||
wal_deletion.reset(new VersionEdit);
|
wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
|
||||||
wal_deletion->DeleteWalsBefore(min_wal_number_to_keep);
|
|
||||||
edit_list.push_back(wal_deletion.get());
|
|
||||||
}
|
}
|
||||||
TEST_SYNC_POINT_CALLBACK(
|
TEST_SYNC_POINT_CALLBACK(
|
||||||
"MemTableList::TryInstallMemtableFlushResults:"
|
"MemTableList::TryInstallMemtableFlushResults:"
|
||||||
"AfterComputeMinWalToKeep",
|
"AfterComputeMinWalToKeep",
|
||||||
nullptr);
|
nullptr);
|
||||||
}
|
}
|
||||||
|
edit_list.push_back(&wal_deletion);
|
||||||
|
|
||||||
const auto manifest_write_cb = [this, cfd, batch_count, log_buffer,
|
const auto manifest_write_cb = [this, cfd, batch_count, log_buffer,
|
||||||
to_delete, mu](const Status& status) {
|
to_delete, mu](const Status& status) {
|
||||||
@ -805,17 +804,15 @@ Status InstallMemtableAtomicFlushResults(
|
|||||||
min_wal_number_to_keep =
|
min_wal_number_to_keep =
|
||||||
PrecomputeMinLogNumberToKeepNon2PC(vset, cfds, edit_lists);
|
PrecomputeMinLogNumberToKeepNon2PC(vset, cfds, edit_lists);
|
||||||
}
|
}
|
||||||
edit_lists.back().back()->SetMinLogNumberToKeep(min_wal_number_to_keep);
|
|
||||||
|
|
||||||
std::unique_ptr<VersionEdit> wal_deletion;
|
VersionEdit wal_deletion;
|
||||||
if (vset->db_options()->track_and_verify_wals_in_manifest) {
|
wal_deletion.SetMinLogNumberToKeep(min_wal_number_to_keep);
|
||||||
if (min_wal_number_to_keep > vset->GetWalSet().GetMinWalNumberToKeep()) {
|
if (vset->db_options()->track_and_verify_wals_in_manifest &&
|
||||||
wal_deletion.reset(new VersionEdit);
|
min_wal_number_to_keep > vset->GetWalSet().GetMinWalNumberToKeep()) {
|
||||||
wal_deletion->DeleteWalsBefore(min_wal_number_to_keep);
|
wal_deletion.DeleteWalsBefore(min_wal_number_to_keep);
|
||||||
edit_lists.back().push_back(wal_deletion.get());
|
|
||||||
++num_entries;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
edit_lists.back().push_back(&wal_deletion);
|
||||||
|
++num_entries;
|
||||||
|
|
||||||
// Mark the version edits as an atomic group if the number of version edits
|
// Mark the version edits as an atomic group if the number of version edits
|
||||||
// exceeds 1.
|
// exceeds 1.
|
||||||
|
@ -120,6 +120,9 @@ bool VersionEdit::EncodeTo(std::string* dst) const {
|
|||||||
if (has_max_column_family_) {
|
if (has_max_column_family_) {
|
||||||
PutVarint32Varint32(dst, kMaxColumnFamily, max_column_family_);
|
PutVarint32Varint32(dst, kMaxColumnFamily, max_column_family_);
|
||||||
}
|
}
|
||||||
|
if (has_min_log_number_to_keep_) {
|
||||||
|
PutVarint32Varint64(dst, kMinLogNumberToKeep, min_log_number_to_keep_);
|
||||||
|
}
|
||||||
if (has_last_sequence_) {
|
if (has_last_sequence_) {
|
||||||
PutVarint32Varint64(dst, kLastSequence, last_sequence_);
|
PutVarint32Varint64(dst, kLastSequence, last_sequence_);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user