db03172d08
Summary: This change eliminates the need for a lot of the PermitUncheckedError calls on return from ErrorHandler methods. The calls are no longer needed as the status is returned as a reference rather than a copy. Additionally, this means that the originating status (recovery_error_, bg_error_) is not cleared implicitly as a result of calling one of these methods. For this class, I do not know if the proper behavior should be to call PermitUncheckedError in the destructor or if the checked state should be cleared when the status is cleared. I did tests both ways. Without the code in the destructor, the status will need to be cleared in at least some of the places where it is set to OK. When running tests, I found no instances where this class was destructed with a non-OK, non-checked Status. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7539 Reviewed By: anand1976 Differential Revision: D25340565 Pulled By: pdillinger fbshipit-source-id: 1730c035c81a475875ea745226112030ec25136c
118 lines
3.9 KiB
C++
118 lines
3.9 KiB
C++
// Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
#pragma once
|
|
|
|
#include "monitoring/instrumented_mutex.h"
|
|
#include "options/db_options.h"
|
|
#include "rocksdb/io_status.h"
|
|
#include "rocksdb/listener.h"
|
|
#include "rocksdb/status.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
class DBImpl;
|
|
|
|
// This structure is used to store the DB recovery context. The context is
|
|
// the information that related to the recover actions. For example, it contains
|
|
// FlushReason, which tells the flush job why this flush is called.
|
|
struct DBRecoverContext {
|
|
FlushReason flush_reason;
|
|
|
|
DBRecoverContext() : flush_reason(FlushReason::kErrorRecovery) {}
|
|
|
|
DBRecoverContext(FlushReason reason) : flush_reason(reason) {}
|
|
};
|
|
|
|
class ErrorHandler {
|
|
public:
|
|
ErrorHandler(DBImpl* db, const ImmutableDBOptions& db_options,
|
|
InstrumentedMutex* db_mutex)
|
|
: db_(db),
|
|
db_options_(db_options),
|
|
cv_(db_mutex),
|
|
end_recovery_(false),
|
|
recovery_thread_(nullptr),
|
|
db_mutex_(db_mutex),
|
|
auto_recovery_(false),
|
|
recovery_in_prog_(false),
|
|
soft_error_no_bg_work_(false) {
|
|
// Clear the checked flag for uninitialized errors
|
|
bg_error_.PermitUncheckedError();
|
|
recovery_error_.PermitUncheckedError();
|
|
recovery_io_error_.PermitUncheckedError();
|
|
}
|
|
|
|
void EnableAutoRecovery() { auto_recovery_ = true; }
|
|
|
|
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
|
|
Status::Code code,
|
|
Status::SubCode subcode);
|
|
|
|
const Status& SetBGError(const Status& bg_err, BackgroundErrorReason reason);
|
|
|
|
const Status& SetBGError(const IOStatus& bg_io_err,
|
|
BackgroundErrorReason reason);
|
|
|
|
Status GetBGError() const { return bg_error_; }
|
|
|
|
Status GetRecoveryError() const { return recovery_error_; }
|
|
|
|
Status ClearBGError();
|
|
|
|
bool IsDBStopped() {
|
|
return !bg_error_.ok() &&
|
|
bg_error_.severity() >= Status::Severity::kHardError;
|
|
}
|
|
|
|
bool IsBGWorkStopped() {
|
|
return !bg_error_.ok() &&
|
|
(bg_error_.severity() >= Status::Severity::kHardError ||
|
|
!auto_recovery_ || soft_error_no_bg_work_);
|
|
}
|
|
|
|
bool IsSoftErrorNoBGWork() { return soft_error_no_bg_work_; }
|
|
|
|
bool IsRecoveryInProgress() { return recovery_in_prog_; }
|
|
|
|
Status RecoverFromBGError(bool is_manual = false);
|
|
void CancelErrorRecovery();
|
|
|
|
void EndAutoRecovery();
|
|
|
|
private:
|
|
DBImpl* db_;
|
|
const ImmutableDBOptions& db_options_;
|
|
Status bg_error_;
|
|
// A separate Status variable used to record any errors during the
|
|
// recovery process from hard errors
|
|
Status recovery_error_;
|
|
// A separate IO Status variable used to record any IO errors during
|
|
// the recovery process. At the same time, recovery_error_ is also set.
|
|
IOStatus recovery_io_error_;
|
|
// The condition variable used with db_mutex during auto resume for time
|
|
// wait.
|
|
InstrumentedCondVar cv_;
|
|
bool end_recovery_;
|
|
std::unique_ptr<port::Thread> recovery_thread_;
|
|
|
|
InstrumentedMutex* db_mutex_;
|
|
// A flag indicating whether automatic recovery from errors is enabled
|
|
bool auto_recovery_;
|
|
bool recovery_in_prog_;
|
|
// A flag to indicate that for the soft error, we should not allow any
|
|
// backrgound work execpt the work is from recovery.
|
|
bool soft_error_no_bg_work_;
|
|
|
|
// Used to store the context for recover, such as flush reason.
|
|
DBRecoverContext recover_context_;
|
|
|
|
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
|
void RecoverFromNoSpace();
|
|
const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
|
|
void RecoverFromRetryableBGIOError();
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|