rocksdb/db/periodic_work_scheduler.h
Jay Zhuang 09b0e8f2c7 Fix a timer crash caused by invalid memory management (#9656)
Summary:
Timer crash when multiple DB instances doing heavy DB open and close
operations concurrently. Which is caused by adding a timer task with
smaller timestamp than the current running task. Fix it by moving the
getting new task timestamp part within timer mutex protection.
And other fixes:
- Disallow adding duplicated function name to timer
- Fix a minor memory leak in timer when a running task is cancelled

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9656

Reviewed By: ajkr

Differential Revision: D34626296

Pulled By: jay-zhuang

fbshipit-source-id: 6b6d96a5149746bf503546244912a9e41a0c5f6b
2022-03-12 11:45:56 -08:00

79 lines
2.7 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include "db/db_impl/db_impl.h"
#include "util/timer.h"
namespace ROCKSDB_NAMESPACE {
class SystemClock;
// PeriodicWorkScheduler is a singleton object, which is scheduling/running
// DumpStats(), PersistStats(), and FlushInfoLog() for all DB instances. All DB
// instances use the same object from `Default()`.
//
// Internally, it uses a single threaded timer object to run the periodic work
// functions. Timer thread will always be started since the info log flushing
// cannot be disabled.
class PeriodicWorkScheduler {
public:
static PeriodicWorkScheduler* Default();
PeriodicWorkScheduler() = delete;
PeriodicWorkScheduler(const PeriodicWorkScheduler&) = delete;
PeriodicWorkScheduler(PeriodicWorkScheduler&&) = delete;
PeriodicWorkScheduler& operator=(const PeriodicWorkScheduler&) = delete;
PeriodicWorkScheduler& operator=(PeriodicWorkScheduler&&) = delete;
Status Register(DBImpl* dbi, unsigned int stats_dump_period_sec,
unsigned int stats_persist_period_sec);
void Unregister(DBImpl* dbi);
// Periodically flush info log out of application buffer at a low frequency.
// This improves debuggability in case of RocksDB hanging since it ensures the
// log messages leading up to the hang will eventually become visible in the
// log.
static const uint64_t kDefaultFlushInfoLogPeriodSec = 10;
protected:
std::unique_ptr<Timer> timer;
// `timer_mu_` serves two purposes currently:
// (1) to ensure calls to `Start()` and `Shutdown()` are serialized, as
// they are currently not implemented in a thread-safe way; and
// (2) to ensure the `Timer::Add()`s and `Timer::Start()` run atomically, and
// the `Timer::Cancel()`s and `Timer::Shutdown()` run atomically.
port::Mutex timer_mu_;
explicit PeriodicWorkScheduler(const std::shared_ptr<SystemClock>& clock);
private:
std::string GetTaskName(DBImpl* dbi, const std::string& func_name);
};
#ifndef NDEBUG
// PeriodicWorkTestScheduler is for unittest, which can specify the SystemClock
// It also contains functions for unittest.
class PeriodicWorkTestScheduler : public PeriodicWorkScheduler {
public:
static PeriodicWorkTestScheduler* Default(
const std::shared_ptr<SystemClock>& clock);
void TEST_WaitForRun(std::function<void()> callback) const;
size_t TEST_GetValidTaskNum() const;
private:
explicit PeriodicWorkTestScheduler(const std::shared_ptr<SystemClock>& clock);
};
#endif // !NDEBUG
} // namespace ROCKSDB_NAMESPACE
#endif // ROCKSDB_LITE