Add the statistics and info log for Error handler (#8050)
Summary: Add statistics and info log for error handler: counters for bg error, bg io error, bg retryable io error, auto resume, auto resume total retry, and auto resume sucess; Histogram for auto resume retry count in each recovery call. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8050 Test Plan: make check and add test to error_handler_fs_test Reviewed By: anand1976 Differential Revision: D26990565 Pulled By: zhichao-cao fbshipit-source-id: 49f71e8ea4e9db8b189943976404205b56ab883f
This commit is contained in:
parent
27d57a035e
commit
08ec5e7321
@ -21,6 +21,7 @@
|
||||
* Add new options for db_bench --benchmarks: flush, waitforcompaction, compact0, compact1.
|
||||
* Add an option to BackupEngine::GetBackupInfo to include the name and size of each backed-up file. Especially in the presence of file sharing among backups, this offers detailed insight into backup space usage.
|
||||
* Enable backward iteration on keys with user-defined timestamps.
|
||||
* Add statistics and info log for error handler: counters for bg error, bg io error, bg retryable io error, auto resume count, auto resume total retry number, and auto resume sucess; Histogram for auto resume retry count in each recovery call. Note that, each auto resume attempt will have one or multiple retries.
|
||||
|
||||
## 6.18.0 (02/19/2021)
|
||||
### Behavior Changes
|
||||
|
@ -4,9 +4,11 @@
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
//
|
||||
#include "db/error_handler.h"
|
||||
|
||||
#include "db/db_impl/db_impl.h"
|
||||
#include "db/event_helpers.h"
|
||||
#include "file/sst_file_manager_impl.h"
|
||||
#include "logging/logging.h"
|
||||
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
@ -274,6 +276,12 @@ const Status& ErrorHandler::SetBGError(const Status& bg_err,
|
||||
return bg_err;
|
||||
}
|
||||
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
|
||||
}
|
||||
ROCKS_LOG_INFO(db_options_.info_log,
|
||||
"ErrorHandler: Set regular background error\n");
|
||||
|
||||
bool paranoid = db_options_.paranoid_checks;
|
||||
Status::Severity sev = Status::Severity::kFatalError;
|
||||
Status new_bg_err;
|
||||
@ -399,6 +407,13 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
|
||||
if (recovery_in_prog_ && recovery_error_.ok()) {
|
||||
recovery_error_ = bg_err;
|
||||
}
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
|
||||
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
|
||||
}
|
||||
ROCKS_LOG_INFO(
|
||||
db_options_.info_log,
|
||||
"ErrorHandler: Set background IO error as unrecoverable error\n");
|
||||
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
||||
&bg_err, db_mutex_, &auto_recovery);
|
||||
recover_context_ = context;
|
||||
@ -416,12 +431,26 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
|
||||
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
||||
&new_bg_io_err, db_mutex_,
|
||||
&auto_recovery);
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_ERROR_COUNT);
|
||||
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
|
||||
RecordTick(bg_error_stats_.get(),
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT);
|
||||
}
|
||||
ROCKS_LOG_INFO(db_options_.info_log,
|
||||
"ErrorHandler: Set background retryable IO error\n");
|
||||
if (BackgroundErrorReason::kCompaction == reason) {
|
||||
// We map the retryable IO error during compaction to soft error. Since
|
||||
// compaction can reschedule by itself. We will not set the BG error in
|
||||
// this case
|
||||
// TODO: a better way to set or clean the retryable IO error which
|
||||
// happens during compaction SST file write.
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_COUNT);
|
||||
}
|
||||
ROCKS_LOG_INFO(
|
||||
db_options_.info_log,
|
||||
"ErrorHandler: Compaction will schedule by itself to resume\n");
|
||||
return bg_error_;
|
||||
} else if (BackgroundErrorReason::kFlushNoWAL == reason ||
|
||||
BackgroundErrorReason::kManifestWriteNoWAL == reason) {
|
||||
@ -455,6 +484,9 @@ const Status& ErrorHandler::SetBGError(const IOStatus& bg_io_err,
|
||||
return StartRecoverFromRetryableBGIOError(bg_io_err);
|
||||
}
|
||||
} else {
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_BG_IO_ERROR_COUNT);
|
||||
}
|
||||
return SetBGError(new_bg_io_err, reason);
|
||||
}
|
||||
}
|
||||
@ -603,7 +635,12 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
|
||||
// Auto resume BG error is not enabled, directly return bg_error_.
|
||||
return bg_error_;
|
||||
}
|
||||
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordTick(bg_error_stats_.get(), ERROR_HANDLER_AUTORESUME_COUNT);
|
||||
}
|
||||
ROCKS_LOG_INFO(
|
||||
db_options_.info_log,
|
||||
"ErrorHandler: Call StartRecoverFromRetryableBGIOError to resume\n");
|
||||
if (recovery_thread_) {
|
||||
// In this case, if recovery_in_prog_ is false, current thread should
|
||||
// wait the previous recover thread to finish and create a new thread
|
||||
@ -642,6 +679,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
|
||||
DBRecoverContext context = recover_context_;
|
||||
int resume_count = db_options_.max_bgerror_resume_count;
|
||||
uint64_t wait_interval = db_options_.bgerror_resume_retry_interval;
|
||||
uint64_t retry_count = 0;
|
||||
// Recover from the retryable error. Create a separate thread to do it.
|
||||
while (resume_count > 0) {
|
||||
if (end_recovery_) {
|
||||
@ -651,15 +689,24 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
|
||||
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeResume1");
|
||||
recovery_io_error_ = IOStatus::OK();
|
||||
recovery_error_ = Status::OK();
|
||||
retry_count++;
|
||||
Status s = db_->ResumeImpl(context);
|
||||
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterResume0");
|
||||
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterResume1");
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordTick(bg_error_stats_.get(),
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT);
|
||||
}
|
||||
if (s.IsShutdownInProgress() ||
|
||||
bg_error_.severity() >= Status::Severity::kFatalError) {
|
||||
// If DB shutdown in progress or the error severity is higher than
|
||||
// Hard Error, stop auto resume and returns.
|
||||
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:RecoverFail0");
|
||||
recovery_in_prog_ = false;
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordInHistogram(bg_error_stats_.get(),
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!recovery_io_error_.ok() &&
|
||||
@ -686,6 +733,12 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
|
||||
bg_error_.PermitUncheckedError();
|
||||
EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners,
|
||||
old_bg_error, db_mutex_);
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordTick(bg_error_stats_.get(),
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT);
|
||||
RecordInHistogram(bg_error_stats_.get(),
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
|
||||
}
|
||||
recovery_in_prog_ = false;
|
||||
if (soft_error_no_bg_work_) {
|
||||
soft_error_no_bg_work_ = false;
|
||||
@ -696,6 +749,10 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
|
||||
// In this case: 1) recovery_io_error is more serious or not retryable
|
||||
// 2) other Non IO recovery_error happens. The auto recovery stops.
|
||||
recovery_in_prog_ = false;
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordInHistogram(bg_error_stats_.get(),
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -703,6 +760,10 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
|
||||
}
|
||||
recovery_in_prog_ = false;
|
||||
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:LoopOut");
|
||||
if (bg_error_stats_ != nullptr) {
|
||||
RecordInHistogram(bg_error_stats_.get(),
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_COUNT, retry_count);
|
||||
}
|
||||
return;
|
||||
#else
|
||||
return;
|
||||
|
@ -37,7 +37,8 @@ class ErrorHandler {
|
||||
db_mutex_(db_mutex),
|
||||
auto_recovery_(false),
|
||||
recovery_in_prog_(false),
|
||||
soft_error_no_bg_work_(false) {
|
||||
soft_error_no_bg_work_(false),
|
||||
bg_error_stats_(db_options.statistics) {
|
||||
// Clear the checked flag for uninitialized errors
|
||||
bg_error_.PermitUncheckedError();
|
||||
recovery_error_.PermitUncheckedError();
|
||||
@ -108,6 +109,9 @@ class ErrorHandler {
|
||||
// Used to store the context for recover, such as flush reason.
|
||||
DBRecoverContext recover_context_;
|
||||
|
||||
// The pointer of DB statistics.
|
||||
std::shared_ptr<Statistics> bg_error_stats_;
|
||||
|
||||
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
||||
void RecoverFromNoSpace();
|
||||
const Status& StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
|
||||
|
@ -158,6 +158,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteError) {
|
||||
options.env = fault_env_.get();
|
||||
options.create_if_missing = true;
|
||||
options.listeners.emplace_back(listener);
|
||||
options.statistics = CreateDBStatistics();
|
||||
Status s;
|
||||
|
||||
listener->EnableAutoRecovery(false);
|
||||
@ -174,13 +175,25 @@ TEST_F(DBErrorHandlingFSTest, FLushWriteError) {
|
||||
fault_fs_->SetFilesystemActive(true);
|
||||
s = dbfull()->Resume();
|
||||
ASSERT_OK(s);
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
|
||||
|
||||
Reopen(options);
|
||||
ASSERT_EQ("val", Get(Key(0)));
|
||||
Destroy(options);
|
||||
}
|
||||
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWritRetryableError) {
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWriteRetryableError) {
|
||||
std::shared_ptr<ErrorHandlerFSListener> listener(
|
||||
new ErrorHandlerFSListener());
|
||||
Options options = GetDefaultOptions();
|
||||
@ -188,6 +201,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableError) {
|
||||
options.create_if_missing = true;
|
||||
options.listeners.emplace_back(listener);
|
||||
options.max_bgerror_resume_count = 0;
|
||||
options.statistics = CreateDBStatistics();
|
||||
Status s;
|
||||
|
||||
listener->EnableAutoRecovery(false);
|
||||
@ -207,6 +221,18 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableError) {
|
||||
fault_fs_->SetFilesystemActive(true);
|
||||
s = dbfull()->Resume();
|
||||
ASSERT_OK(s);
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
|
||||
Reopen(options);
|
||||
ASSERT_EQ("val1", Get(Key(1)));
|
||||
|
||||
@ -241,7 +267,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritRetryableError) {
|
||||
Destroy(options);
|
||||
}
|
||||
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWritFileScopeError) {
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWriteFileScopeError) {
|
||||
std::shared_ptr<ErrorHandlerFSListener> listener(
|
||||
new ErrorHandlerFSListener());
|
||||
Options options = GetDefaultOptions();
|
||||
@ -325,7 +351,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritFileScopeError) {
|
||||
Destroy(options);
|
||||
}
|
||||
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) {
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError1) {
|
||||
std::shared_ptr<ErrorHandlerFSListener> listener(
|
||||
new ErrorHandlerFSListener());
|
||||
Options options = GetDefaultOptions();
|
||||
@ -333,6 +359,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) {
|
||||
options.create_if_missing = true;
|
||||
options.listeners.emplace_back(listener);
|
||||
options.max_bgerror_resume_count = 0;
|
||||
options.statistics = CreateDBStatistics();
|
||||
Status s;
|
||||
|
||||
listener->EnableAutoRecovery(false);
|
||||
@ -363,11 +390,23 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError1) {
|
||||
s = Flush();
|
||||
ASSERT_OK(s);
|
||||
ASSERT_EQ("val3", Get(Key(3)));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
|
||||
|
||||
Destroy(options);
|
||||
}
|
||||
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError2) {
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError2) {
|
||||
std::shared_ptr<ErrorHandlerFSListener> listener(
|
||||
new ErrorHandlerFSListener());
|
||||
Options options = GetDefaultOptions();
|
||||
@ -410,7 +449,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError2) {
|
||||
Destroy(options);
|
||||
}
|
||||
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableError3) {
|
||||
TEST_F(DBErrorHandlingFSTest, FLushWriteNoWALRetryableError3) {
|
||||
std::shared_ptr<ErrorHandlerFSListener> listener(
|
||||
new ErrorHandlerFSListener());
|
||||
Options options = GetDefaultOptions();
|
||||
@ -1010,6 +1049,7 @@ TEST_F(DBErrorHandlingFSTest, AutoRecoverFlushError) {
|
||||
options.env = fault_env_.get();
|
||||
options.create_if_missing = true;
|
||||
options.listeners.emplace_back(listener);
|
||||
options.statistics = CreateDBStatistics();
|
||||
Status s;
|
||||
|
||||
listener->EnableAutoRecovery();
|
||||
@ -1028,6 +1068,18 @@ TEST_F(DBErrorHandlingFSTest, AutoRecoverFlushError) {
|
||||
|
||||
s = Put(Key(1), "val");
|
||||
ASSERT_OK(s);
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
|
||||
|
||||
Reopen(options);
|
||||
ASSERT_EQ("val", Get(Key(0)));
|
||||
@ -1567,6 +1619,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover1) {
|
||||
options.listeners.emplace_back(listener);
|
||||
options.max_bgerror_resume_count = 2;
|
||||
options.bgerror_resume_retry_interval = 100000; // 0.1 second
|
||||
options.statistics = CreateDBStatistics();
|
||||
Status s;
|
||||
|
||||
listener->EnableAutoRecovery(false);
|
||||
@ -1594,6 +1647,22 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover1) {
|
||||
ASSERT_EQ("val1", Get(Key(1)));
|
||||
SyncPoint::GetInstance()->DisableProcessing();
|
||||
fault_fs_->SetFilesystemActive(true);
|
||||
ASSERT_EQ(3, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_ERROR_COUNT));
|
||||
ASSERT_EQ(3, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(3, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_COUNT));
|
||||
ASSERT_EQ(2, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
|
||||
ASSERT_EQ(0, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
|
||||
HistogramData autoresume_retry;
|
||||
options.statistics->histogramData(ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
|
||||
&autoresume_retry);
|
||||
ASSERT_EQ(autoresume_retry.max, 2);
|
||||
ASSERT_OK(Put(Key(2), "val2", wo));
|
||||
s = Flush();
|
||||
// Since auto resume fails, the bg error is not cleand, flush will
|
||||
@ -1620,6 +1689,7 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
|
||||
options.listeners.emplace_back(listener);
|
||||
options.max_bgerror_resume_count = 2;
|
||||
options.bgerror_resume_retry_interval = 100000; // 0.1 second
|
||||
options.statistics = CreateDBStatistics();
|
||||
Status s;
|
||||
|
||||
listener->EnableAutoRecovery(false);
|
||||
@ -1643,6 +1713,22 @@ TEST_F(DBErrorHandlingFSTest, FLushWritNoWALRetryableeErrorAutoRecover2) {
|
||||
fault_fs_->SetFilesystemActive(true);
|
||||
ASSERT_EQ(listener->WaitForRecovery(5000000), true);
|
||||
ASSERT_EQ("val1", Get(Key(1)));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT));
|
||||
ASSERT_EQ(1, options.statistics->getAndResetTickerCount(
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT));
|
||||
HistogramData autoresume_retry;
|
||||
options.statistics->histogramData(ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
|
||||
&autoresume_retry);
|
||||
ASSERT_EQ(autoresume_retry.max, 1);
|
||||
ASSERT_OK(Put(Key(2), "val2", wo));
|
||||
s = Flush();
|
||||
// Since auto resume is successful, the bg error is cleaned, flush will
|
||||
|
@ -374,6 +374,15 @@ enum Tickers : uint32_t {
|
||||
// # of files deleted immediately by sst file manger through delete scheduler.
|
||||
FILES_DELETED_IMMEDIATELY,
|
||||
|
||||
// The counters for error handler, not that, bg_io_error is the subset of
|
||||
// bg_error and bg_retryable_io_error is the subset of bg_io_error
|
||||
ERROR_HANDLER_BG_ERROR_COUNT,
|
||||
ERROR_HANDLER_BG_IO_ERROR_COUNT,
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT,
|
||||
ERROR_HANDLER_AUTORESUME_COUNT,
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT,
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT,
|
||||
|
||||
TICKER_ENUM_MAX
|
||||
};
|
||||
|
||||
@ -472,6 +481,9 @@ enum Histograms : uint32_t {
|
||||
// Num of sst files read from file system per level.
|
||||
NUM_SST_READ_PER_LEVEL,
|
||||
|
||||
// Error handler statistics
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
|
||||
|
||||
HISTOGRAM_ENUM_MAX,
|
||||
};
|
||||
|
||||
|
@ -4982,7 +4982,20 @@ class TickerTypeJni {
|
||||
return -0x14;
|
||||
case ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_TTL:
|
||||
return -0x15;
|
||||
|
||||
case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_ERROR_COUNT:
|
||||
return -0x16;
|
||||
case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_IO_ERROR_COUNT:
|
||||
return -0x17;
|
||||
case ROCKSDB_NAMESPACE::Tickers::
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT:
|
||||
return -0x18;
|
||||
case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_COUNT:
|
||||
return -0x19;
|
||||
case ROCKSDB_NAMESPACE::Tickers::
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT:
|
||||
return -0x1A;
|
||||
case ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT:
|
||||
return -0x1B;
|
||||
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
|
||||
// 0x5F for backwards compatibility on current minor version.
|
||||
return 0x5F;
|
||||
@ -5294,6 +5307,21 @@ class TickerTypeJni {
|
||||
return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_PERIODIC;
|
||||
case -0x15:
|
||||
return ROCKSDB_NAMESPACE::Tickers::COMPACT_WRITE_BYTES_TTL;
|
||||
case -0x16:
|
||||
return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_ERROR_COUNT;
|
||||
case -0x17:
|
||||
return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_BG_IO_ERROR_COUNT;
|
||||
case -0x18:
|
||||
return ROCKSDB_NAMESPACE::Tickers::
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT;
|
||||
case -0x19:
|
||||
return ROCKSDB_NAMESPACE::Tickers::ERROR_HANDLER_AUTORESUME_COUNT;
|
||||
case -0x1A:
|
||||
return ROCKSDB_NAMESPACE::Tickers::
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT;
|
||||
case -0x1B:
|
||||
return ROCKSDB_NAMESPACE::Tickers::
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT;
|
||||
case 0x5F:
|
||||
// 0x5F for backwards compatibility on current minor version.
|
||||
return ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX;
|
||||
@ -5413,6 +5441,8 @@ class HistogramTypeJni {
|
||||
return 0x30;
|
||||
case ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL:
|
||||
return 0x31;
|
||||
case ROCKSDB_NAMESPACE::Histograms::ERROR_HANDLER_AUTORESUME_RETRY_COUNT:
|
||||
return 0x31;
|
||||
case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
|
||||
// 0x1F for backwards compatibility on current minor version.
|
||||
return 0x1F;
|
||||
@ -5527,6 +5557,9 @@ class HistogramTypeJni {
|
||||
return ROCKSDB_NAMESPACE::Histograms::NUM_DATA_BLOCKS_READ_PER_LEVEL;
|
||||
case 0x31:
|
||||
return ROCKSDB_NAMESPACE::Histograms::NUM_SST_READ_PER_LEVEL;
|
||||
case 0x32:
|
||||
return ROCKSDB_NAMESPACE::Histograms::
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_COUNT;
|
||||
case 0x1F:
|
||||
// 0x1F for backwards compatibility on current minor version.
|
||||
return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;
|
||||
|
@ -175,6 +175,11 @@ public enum HistogramType {
|
||||
*/
|
||||
NUM_SST_READ_PER_LEVEL((byte) 0x31),
|
||||
|
||||
/**
|
||||
* The number of retry in auto resume
|
||||
*/
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_COUNT((byte) 0x32),
|
||||
|
||||
// 0x1F for backwards compatibility on current minor version.
|
||||
HISTOGRAM_ENUM_MAX((byte) 0x1F);
|
||||
|
||||
|
@ -742,6 +742,16 @@ public enum TickerType {
|
||||
COMPACT_WRITE_BYTES_PERIODIC((byte) -0x14),
|
||||
COMPACT_WRITE_BYTES_TTL((byte) -0x15),
|
||||
|
||||
/**
|
||||
* DB error handler statistics
|
||||
*/
|
||||
ERROR_HANDLER_BG_ERROR_COUNT((byte) -0x16),
|
||||
ERROR_HANDLER_BG_IO_ERROR_COUNT((byte) -0x17),
|
||||
ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT((byte) -0x18),
|
||||
ERROR_HANDLER_AUTORESUME_COUNT((byte) -0x19),
|
||||
ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT((byte) -0x1A),
|
||||
ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT((byte) -0x1B),
|
||||
|
||||
TICKER_ENUM_MAX((byte) 0x5F);
|
||||
|
||||
private final byte value;
|
||||
|
@ -191,6 +191,16 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
|
||||
"rocksdb.block.cache.compression.dict.add.redundant"},
|
||||
{FILES_MARKED_TRASH, "rocksdb.files.marked.trash"},
|
||||
{FILES_DELETED_IMMEDIATELY, "rocksdb.files.deleted.immediately"},
|
||||
{ERROR_HANDLER_BG_ERROR_COUNT, "rocksdb.error.handler.bg.errro.count"},
|
||||
{ERROR_HANDLER_BG_IO_ERROR_COUNT,
|
||||
"rocksdb.error.handler.bg.io.errro.count"},
|
||||
{ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT,
|
||||
"rocksdb.error.handler.bg.retryable.io.errro.count"},
|
||||
{ERROR_HANDLER_AUTORESUME_COUNT, "rocksdb.error.handler.autoresume.count"},
|
||||
{ERROR_HANDLER_AUTORESUME_RETRY_TOTAL_COUNT,
|
||||
"rocksdb.error.handler.autoresume.retry.total.count"},
|
||||
{ERROR_HANDLER_AUTORESUME_SUCCESS_COUNT,
|
||||
"rocksdb.error.handler.autoresume.success.count"},
|
||||
};
|
||||
|
||||
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
|
||||
@ -246,6 +256,8 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
|
||||
"rocksdb.num.index.and.filter.blocks.read.per.level"},
|
||||
{NUM_DATA_BLOCKS_READ_PER_LEVEL, "rocksdb.num.data.blocks.read.per.level"},
|
||||
{NUM_SST_READ_PER_LEVEL, "rocksdb.num.sst.read.per.level"},
|
||||
{ERROR_HANDLER_AUTORESUME_RETRY_COUNT,
|
||||
"rocksdb.error.handler.autoresume.retry.count"},
|
||||
};
|
||||
|
||||
std::shared_ptr<Statistics> CreateDBStatistics() {
|
||||
|
Loading…
Reference in New Issue
Block a user