workaround race conditions during PeriodicWorkScheduler
registration (#7888)
Summary: This provides a workaround for two race conditions that will be fixed in a more sophisticated way later. This PR: (1) Makes the client serialize calls to `Timer::Start()` and `Timer::Shutdown()` (see https://github.com/facebook/rocksdb/issues/7711). The long-term fix will be to make those functions thread-safe. (2) Makes `PeriodicWorkScheduler` atomically add/cancel work together with starting/shutting down its `Timer`. The long-term fix will be for `Timer` API to offer more specialized APIs so the client will not need to synchronize. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7888 Test Plan: ran the repro provided in https://github.com/facebook/rocksdb/issues/7881 Reviewed By: jay-zhuang Differential Revision: D25990891 Pulled By: ajkr fbshipit-source-id: a97fdaebbda6d7db7ddb1b146738b68c16c5be38
This commit is contained in:
parent
d5f5d6579a
commit
e18a4df62a
@ -1,4 +1,7 @@
|
||||
# Rocksdb Change Log
|
||||
## Unreleased
|
||||
* Fix a race condition between DB startups and shutdowns in managing the periodic background worker threads. One effect of this race condition could be the process being terminated.
|
||||
|
||||
## 6.17.0 (01/15/2021)
|
||||
### Behavior Changes
|
||||
* When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created.
|
||||
|
@ -10,13 +10,14 @@
|
||||
#ifndef ROCKSDB_LITE
|
||||
namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
PeriodicWorkScheduler::PeriodicWorkScheduler(Env* env) {
|
||||
PeriodicWorkScheduler::PeriodicWorkScheduler(Env* env) : timer_mu_(env) {
|
||||
timer = std::unique_ptr<Timer>(new Timer(env));
|
||||
}
|
||||
|
||||
void PeriodicWorkScheduler::Register(DBImpl* dbi,
|
||||
unsigned int stats_dump_period_sec,
|
||||
unsigned int stats_persist_period_sec) {
|
||||
MutexLock l(&timer_mu_);
|
||||
static std::atomic<uint64_t> initial_delay(0);
|
||||
timer->Start();
|
||||
if (stats_dump_period_sec > 0) {
|
||||
@ -41,6 +42,7 @@ void PeriodicWorkScheduler::Register(DBImpl* dbi,
|
||||
}
|
||||
|
||||
void PeriodicWorkScheduler::Unregister(DBImpl* dbi) {
|
||||
MutexLock l(&timer_mu_);
|
||||
timer->Cancel(GetTaskName(dbi, "dump_st"));
|
||||
timer->Cancel(GetTaskName(dbi, "pst_st"));
|
||||
timer->Cancel(GetTaskName(dbi, "flush_info_log"));
|
||||
@ -78,7 +80,10 @@ PeriodicWorkTestScheduler* PeriodicWorkTestScheduler::Default(Env* env) {
|
||||
MutexLock l(&mutex);
|
||||
if (scheduler.timer.get() != nullptr &&
|
||||
scheduler.timer->TEST_GetPendingTaskNum() == 0) {
|
||||
scheduler.timer->Shutdown();
|
||||
{
|
||||
MutexLock timer_mu_guard(&scheduler.timer_mu_);
|
||||
scheduler.timer->Shutdown();
|
||||
}
|
||||
scheduler.timer.reset(new Timer(env));
|
||||
}
|
||||
}
|
||||
|
@ -42,6 +42,12 @@ class PeriodicWorkScheduler {
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Timer> timer;
|
||||
// `timer_mu_` serves two purposes currently:
|
||||
// (1) to ensure calls to `Start()` and `Shutdown()` are serialized, as
|
||||
// they are currently not implemented in a thread-safe way; and
|
||||
// (2) to ensure the `Timer::Add()`s and `Timer::Start()` run atomically, and
|
||||
// the `Timer::Cancel()`s and `Timer::Shutdown()` run atomically.
|
||||
port::Mutex timer_mu_;
|
||||
|
||||
explicit PeriodicWorkScheduler(Env* env);
|
||||
|
||||
|
@ -22,6 +22,9 @@ namespace ROCKSDB_NAMESPACE {
|
||||
|
||||
// A Timer class to handle repeated work.
|
||||
//
|
||||
// `Start()` and `Shutdown()` are currently not thread-safe. The client must
|
||||
// serialize calls to these two member functions.
|
||||
//
|
||||
// A single timer instance can handle multiple functions via a single thread.
|
||||
// It is better to leave long running work to a dedicated thread pool.
|
||||
//
|
||||
|
Loading…
Reference in New Issue
Block a user