Handle missing WAL in secondary mode (#5323)
Summary: In secondary mode, it is possible that the secondary lists the primary's WAL directory, finds a WAL and tries to open it. It is possible that the primary deletes the WAL after secondary listing dir but before the secondary opening it. Then the secondary will fail to open the WAL file with a PathNotFound status. In this case, we can return OK without replaying WAL and optionally replay more MANIFEST. Test Plan (on my dev machine): Without this PR, the following will fail several times out of 100 runs. ``` ~/gtest-parallel/gtest-parallel -r 100 -w 16 ./db_secondary_test --gtest_filter=DBSecondaryTest.SwitchToNewManifestDuringOpen ``` With this PR, the above should always succeed. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5323 Differential Revision: D15763878 Pulled By: riversand963 fbshipit-source-id: c7164fa7cb8d9001abc258b6a2dc93613e4f38ff
This commit is contained in:
parent
9bbccda01e
commit
7177dc46a1
@ -60,6 +60,12 @@ Status DBImplSecondary::Recover(
|
||||
s = FindAndRecoverLogFiles(&cfds_changed, &job_context);
|
||||
}
|
||||
|
||||
if (s.IsPathNotFound()) {
|
||||
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
||||
"Secondary tries to read WAL, but WAL file(s) have already "
|
||||
"been purged by primary.");
|
||||
s = Status::OK();
|
||||
}
|
||||
// TODO: update options_file_number_ needed?
|
||||
|
||||
job_context.Clean();
|
||||
@ -475,6 +481,12 @@ Status DBImplSecondary::TryCatchUpWithPrimary() {
|
||||
if (s.ok()) {
|
||||
s = FindAndRecoverLogFiles(&cfds_changed, &job_context);
|
||||
}
|
||||
if (s.IsPathNotFound()) {
|
||||
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
||||
"Secondary tries to read WAL, but WAL file(s) have already "
|
||||
"been purged by primary.");
|
||||
s = Status::OK();
|
||||
}
|
||||
if (s.ok()) {
|
||||
for (auto cfd : cfds_changed) {
|
||||
cfd->imm()->RemoveOldMemTables(cfd->GetLogNumber(),
|
||||
|
Loading…
Reference in New Issue
Block a user