diff --git a/HISTORY.md b/HISTORY.md index a388f413b..02c231283 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,7 @@ # Rocksdb Change Log +## Unreleased +* Fix a race condition between DB startups and shutdowns in managing the periodic background worker threads. One effect of this race condition could be the process being terminated. + ## 6.17.0 (01/15/2021) ### Behavior Changes * When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created. diff --git a/db/periodic_work_scheduler.cc b/db/periodic_work_scheduler.cc index cc6f714d9..da0bc1e4b 100644 --- a/db/periodic_work_scheduler.cc +++ b/db/periodic_work_scheduler.cc @@ -10,13 +10,14 @@ #ifndef ROCKSDB_LITE namespace ROCKSDB_NAMESPACE { -PeriodicWorkScheduler::PeriodicWorkScheduler(Env* env) { +PeriodicWorkScheduler::PeriodicWorkScheduler(Env* env) : timer_mu_(env) { timer = std::unique_ptr(new Timer(env)); } void PeriodicWorkScheduler::Register(DBImpl* dbi, unsigned int stats_dump_period_sec, unsigned int stats_persist_period_sec) { + MutexLock l(&timer_mu_); static std::atomic initial_delay(0); timer->Start(); if (stats_dump_period_sec > 0) { @@ -41,6 +42,7 @@ void PeriodicWorkScheduler::Register(DBImpl* dbi, } void PeriodicWorkScheduler::Unregister(DBImpl* dbi) { + MutexLock l(&timer_mu_); timer->Cancel(GetTaskName(dbi, "dump_st")); timer->Cancel(GetTaskName(dbi, "pst_st")); timer->Cancel(GetTaskName(dbi, "flush_info_log")); @@ -78,7 +80,10 @@ PeriodicWorkTestScheduler* PeriodicWorkTestScheduler::Default(Env* env) { MutexLock l(&mutex); if (scheduler.timer.get() != nullptr && scheduler.timer->TEST_GetPendingTaskNum() == 0) { - scheduler.timer->Shutdown(); + { + MutexLock timer_mu_guard(&scheduler.timer_mu_); + scheduler.timer->Shutdown(); + } scheduler.timer.reset(new Timer(env)); } } diff --git a/db/periodic_work_scheduler.h b/db/periodic_work_scheduler.h index 6c1ce314c..9382adc44 100644 --- a/db/periodic_work_scheduler.h +++ b/db/periodic_work_scheduler.h @@ -42,6 +42,12 @@ class PeriodicWorkScheduler { protected: std::unique_ptr timer; + // `timer_mu_` serves two purposes currently: + // (1) to ensure calls to `Start()` and `Shutdown()` are serialized, as + // they are currently not implemented in a thread-safe way; and + // (2) to ensure the `Timer::Add()`s and `Timer::Start()` run atomically, and + // the `Timer::Cancel()`s and `Timer::Shutdown()` run atomically. + port::Mutex timer_mu_; explicit PeriodicWorkScheduler(Env* env); diff --git a/util/timer.h b/util/timer.h index 8e12d7d7b..b6ee42ed0 100644 --- a/util/timer.h +++ b/util/timer.h @@ -22,6 +22,9 @@ namespace ROCKSDB_NAMESPACE { // A Timer class to handle repeated work. // +// `Start()` and `Shutdown()` are currently not thread-safe. The client must +// serialize calls to these two member functions. +// // A single timer instance can handle multiple functions via a single thread. // It is better to leave long running work to a dedicated thread pool. //