Fix a timer crash caused by invalid memory management (#9656)

Summary:
Timer crash when multiple DB instances doing heavy DB open and close
operations concurrently. Which is caused by adding a timer task with
smaller timestamp than the current running task. Fix it by moving the
getting new task timestamp part within timer mutex protection.
And other fixes:
- Disallow adding duplicated function name to timer
- Fix a minor memory leak in timer when a running task is cancelled

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9656

Reviewed By: ajkr

Differential Revision: D34626296

Pulled By: jay-zhuang

fbshipit-source-id: 6b6d96a5149746bf503546244912a9e41a0c5f6b
This commit is contained in:
Jay Zhuang 2022-03-12 11:45:56 -08:00 committed by Andrew Kryczka
parent bbae679ce7
commit 22e011fe0a
11 changed files with 92 additions and 66 deletions

View File

@ -989,6 +989,7 @@ WriteStallCondition ColumnFamilyData::RecalculateWriteStallConditions(
GetL0ThresholdSpeedupCompaction(
mutable_cf_options.level0_file_num_compaction_trigger,
mutable_cf_options.level0_slowdown_writes_trigger)) {
fprintf(stdout, "JJJ2\n");
write_controller_token_ =
write_controller->GetCompactionPressureToken();
ROCKS_LOG_INFO(

View File

@ -372,6 +372,7 @@ Compaction* UniversalCompactionBuilder::PickCompaction() {
const int kLevel0 = 0;
score_ = vstorage_->CompactionScore(kLevel0);
sorted_runs_ = CalculateSortedRuns(*vstorage_);
fprintf(stdout, "JJJ1\n");
if (sorted_runs_.size() == 0 ||
(vstorage_->FilesMarkedForPeriodicCompaction().empty() &&

View File

@ -171,7 +171,9 @@ Status CompactedDBImpl::Open(const Options& options,
std::unique_ptr<CompactedDBImpl> db(new CompactedDBImpl(db_options, dbname));
Status s = db->Init(options);
if (s.ok()) {
db->StartPeriodicWorkScheduler();
s = db->StartPeriodicWorkScheduler();
}
if (s.ok()) {
ROCKS_LOG_INFO(db->immutable_db_options_.info_log,
"Opened the db as fully compacted mode");
LogFlush(db->immutable_db_options_.info_log);

View File

@ -770,7 +770,7 @@ void DBImpl::PrintStatistics() {
}
}
void DBImpl::StartPeriodicWorkScheduler() {
Status DBImpl::StartPeriodicWorkScheduler() {
#ifndef ROCKSDB_LITE
#ifndef NDEBUG
@ -780,7 +780,7 @@ void DBImpl::StartPeriodicWorkScheduler() {
"DBImpl::StartPeriodicWorkScheduler:DisableScheduler",
&disable_scheduler);
if (disable_scheduler) {
return;
return Status::OK();
}
#endif // !NDEBUG
@ -791,10 +791,11 @@ void DBImpl::StartPeriodicWorkScheduler() {
&periodic_work_scheduler_);
}
periodic_work_scheduler_->Register(
return periodic_work_scheduler_->Register(
this, mutable_db_options_.stats_dump_period_sec,
mutable_db_options_.stats_persist_period_sec);
#endif // !ROCKSDB_LITE
return Status::OK();
}
// esitmate the total size of stats_history_
@ -1228,7 +1229,7 @@ Status DBImpl::SetDBOptions(
mutable_db_options_.stats_persist_period_sec) {
mutex_.Unlock();
periodic_work_scheduler_->Unregister(this);
periodic_work_scheduler_->Register(
s = periodic_work_scheduler_->Register(
this, new_options.stats_dump_period_sec,
new_options.stats_persist_period_sec);
mutex_.Lock();

View File

@ -1866,7 +1866,7 @@ class DBImpl : public DB {
LogBuffer* log_buffer);
// Schedule background tasks
void StartPeriodicWorkScheduler();
Status StartPeriodicWorkScheduler();
void PrintStatistics();

View File

@ -285,6 +285,7 @@ Status DBImpl::FlushMemTableToOutputFile(
assert(storage_info);
VersionStorageInfo::LevelSummaryStorage tmp;
fprintf(stdout, "JJJ4\n");
ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
column_family_name.c_str(),
storage_info->LevelSummary(&tmp));
@ -729,6 +730,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
assert(storage_info);
VersionStorageInfo::LevelSummaryStorage tmp;
fprintf(stdout, "JJJ3\n");
ROCKS_LOG_BUFFER(log_buffer, "[%s] Level summary: %s\n",
column_family_name.c_str(),
storage_info->LevelSummary(&tmp));

View File

@ -1889,8 +1889,9 @@ Status DBImpl::Open(const DBOptions& db_options, const std::string& dbname,
persist_options_status.ToString().c_str());
}
if (s.ok()) {
impl->StartPeriodicWorkScheduler();
} else {
s = impl->StartPeriodicWorkScheduler();
}
if (!s.ok()) {
for (auto* h : *handles) {
delete h;
}

View File

@ -16,31 +16,41 @@ PeriodicWorkScheduler::PeriodicWorkScheduler(
timer = std::unique_ptr<Timer>(new Timer(clock.get()));
}
void PeriodicWorkScheduler::Register(DBImpl* dbi,
unsigned int stats_dump_period_sec,
unsigned int stats_persist_period_sec) {
Status PeriodicWorkScheduler::Register(DBImpl* dbi,
unsigned int stats_dump_period_sec,
unsigned int stats_persist_period_sec) {
MutexLock l(&timer_mu_);
static std::atomic<uint64_t> initial_delay(0);
timer->Start();
if (stats_dump_period_sec > 0) {
timer->Add([dbi]() { dbi->DumpStats(); }, GetTaskName(dbi, "dump_st"),
initial_delay.fetch_add(1) %
static_cast<uint64_t>(stats_dump_period_sec) *
kMicrosInSecond,
static_cast<uint64_t>(stats_dump_period_sec) * kMicrosInSecond);
bool succeeded = timer->Add(
[dbi]() { dbi->DumpStats(); }, GetTaskName(dbi, "dump_st"),
initial_delay.fetch_add(1) %
static_cast<uint64_t>(stats_dump_period_sec) * kMicrosInSecond,
static_cast<uint64_t>(stats_dump_period_sec) * kMicrosInSecond);
if (!succeeded) {
return Status::Aborted("Unable to add periodic task DumpStats");
}
}
if (stats_persist_period_sec > 0) {
timer->Add(
bool succeeded = timer->Add(
[dbi]() { dbi->PersistStats(); }, GetTaskName(dbi, "pst_st"),
initial_delay.fetch_add(1) %
static_cast<uint64_t>(stats_persist_period_sec) * kMicrosInSecond,
static_cast<uint64_t>(stats_persist_period_sec) * kMicrosInSecond);
if (!succeeded) {
return Status::Aborted("Unable to add periodic task PersistStats");
}
}
timer->Add([dbi]() { dbi->FlushInfoLog(); },
GetTaskName(dbi, "flush_info_log"),
initial_delay.fetch_add(1) % kDefaultFlushInfoLogPeriodSec *
kMicrosInSecond,
kDefaultFlushInfoLogPeriodSec * kMicrosInSecond);
bool succeeded = timer->Add(
[dbi]() { dbi->FlushInfoLog(); }, GetTaskName(dbi, "flush_info_log"),
initial_delay.fetch_add(1) % kDefaultFlushInfoLogPeriodSec *
kMicrosInSecond,
kDefaultFlushInfoLogPeriodSec * kMicrosInSecond);
if (!succeeded) {
return Status::Aborted("Unable to add periodic task PersistStats");
}
return Status::OK();
}
void PeriodicWorkScheduler::Unregister(DBImpl* dbi) {

View File

@ -30,8 +30,8 @@ class PeriodicWorkScheduler {
PeriodicWorkScheduler& operator=(const PeriodicWorkScheduler&) = delete;
PeriodicWorkScheduler& operator=(PeriodicWorkScheduler&&) = delete;
void Register(DBImpl* dbi, unsigned int stats_dump_period_sec,
unsigned int stats_persist_period_sec);
Status Register(DBImpl* dbi, unsigned int stats_dump_period_sec,
unsigned int stats_persist_period_sec);
void Unregister(DBImpl* dbi);

View File

@ -48,36 +48,38 @@ class Timer {
~Timer() { Shutdown(); }
// Add a new function to run.
// fn_name has to be identical, otherwise, the new one overrides the existing
// one, regardless if the function is pending removed (invalid) or not.
// fn_name has to be identical, otherwise it will fail to add and return false
// start_after_us is the initial delay.
// repeat_every_us is the interval between ending time of the last call and
// starting time of the next call. For example, repeat_every_us = 2000 and
// the function takes 1000us to run. If it starts at time [now]us, then it
// finishes at [now]+1000us, 2nd run starting time will be at [now]+3000us.
// repeat_every_us == 0 means do not repeat.
void Add(std::function<void()> fn,
const std::string& fn_name,
uint64_t start_after_us,
uint64_t repeat_every_us) {
std::unique_ptr<FunctionInfo> fn_info(new FunctionInfo(
std::move(fn), fn_name, clock_->NowMicros() + start_after_us,
repeat_every_us));
{
InstrumentedMutexLock l(&mutex_);
auto it = map_.find(fn_name);
if (it == map_.end()) {
heap_.push(fn_info.get());
map_.emplace(std::make_pair(fn_name, std::move(fn_info)));
} else {
// If it already exists, overriding it.
it->second->fn = std::move(fn_info->fn);
it->second->valid = true;
it->second->next_run_time_us = clock_->NowMicros() + start_after_us;
it->second->repeat_every_us = repeat_every_us;
}
bool Add(std::function<void()> fn, const std::string& fn_name,
uint64_t start_after_us, uint64_t repeat_every_us) {
auto fn_info = std::make_unique<FunctionInfo>(std::move(fn), fn_name, 0,
repeat_every_us);
InstrumentedMutexLock l(&mutex_);
// Assign time within mutex to make sure the next_run_time is larger than
// the current running one
fn_info->next_run_time_us = clock_->NowMicros() + start_after_us;
// the new task start time should never before the current task executing
// time, as the executing task can only be running if it's next_run_time_us
// is due (<= clock_->NowMicros()).
if (executing_task_ &&
fn_info->next_run_time_us < heap_.top()->next_run_time_us) {
return false;
}
auto it = map_.find(fn_name);
if (it == map_.end()) {
heap_.push(fn_info.get());
map_.try_emplace(fn_name, std::move(fn_info));
} else {
// timer doesn't support duplicated function name
return false;
}
cond_var_.SignalAll();
return true;
}
void Cancel(const std::string& fn_name) {
@ -116,7 +118,7 @@ class Timer {
}
running_ = true;
thread_.reset(new port::Thread(&Timer::Run, this));
thread_ = std::make_unique<port::Thread>(&Timer::Run, this);
return true;
}
@ -140,8 +142,8 @@ class Timer {
bool HasPendingTask() const {
InstrumentedMutexLock l(&mutex_);
for (auto it = map_.begin(); it != map_.end(); it++) {
if (it->second->IsValid()) {
for (const auto& fn_info : map_) {
if (fn_info.second->IsValid()) {
return true;
}
}
@ -155,7 +157,7 @@ class Timer {
// here to bump current time and trigger Timer. See timer_test for example.
//
// Note: only support one caller of this method.
void TEST_WaitForRun(std::function<void()> callback = nullptr) {
void TEST_WaitForRun(const std::function<void()>& callback = nullptr) {
InstrumentedMutexLock l(&mutex_);
// It act as a spin lock
while (executing_task_ ||
@ -177,8 +179,8 @@ class Timer {
size_t TEST_GetPendingTaskNum() const {
InstrumentedMutexLock l(&mutex_);
size_t ret = 0;
for (auto it = map_.begin(); it != map_.end(); it++) {
if (it->second->IsValid()) {
for (const auto& fn_info : map_) {
if (fn_info.second->IsValid()) {
ret++;
}
}
@ -220,10 +222,13 @@ class Timer {
executing_task_ = false;
cond_var_.SignalAll();
// Remove the work from the heap once it is done executing.
// Remove the work from the heap once it is done executing, make sure
// it's the same function after executing the work while mutex is
// released.
// Note that we are just removing the pointer from the heap. Its
// memory is still managed in the map (as it holds a unique ptr).
// So current_fn is still a valid ptr.
assert(heap_.top() == current_fn);
heap_.pop();
// current_fn may be cancelled already.
@ -234,6 +239,10 @@ class Timer {
// Schedule new work into the heap with new time.
heap_.push(current_fn);
} else {
// if current_fn is cancelled or no need to repeat, remove it from the
// map to avoid leak.
map_.erase(current_fn->name);
}
} else {
cond_var_.TimedWait(current_fn->next_run_time_us);
@ -280,10 +289,10 @@ class Timer {
// calls `Cancel()`.
bool valid;
FunctionInfo(std::function<void()>&& _fn, const std::string& _name,
FunctionInfo(std::function<void()>&& _fn, std::string _name,
const uint64_t _next_run_time_us, uint64_t _repeat_every_us)
: fn(std::move(_fn)),
name(_name),
name(std::move(_name)),
next_run_time_us(_next_run_time_us),
repeat_every_us(_repeat_every_us),
valid(true) {}

View File

@ -273,33 +273,32 @@ TEST_F(TimerTest, AddSameFuncName) {
ASSERT_TRUE(timer.Start());
int func_counter1 = 0;
timer.Add([&] { func_counter1++; }, "duplicated_func", kInitDelayUs,
kRepeat1Us);
ASSERT_TRUE(timer.Add([&] { func_counter1++; }, "duplicated_func",
kInitDelayUs, kRepeat1Us));
int func2_counter = 0;
timer.Add([&] { func2_counter++; }, "func2", kInitDelayUs, kRepeat2Us);
ASSERT_TRUE(
timer.Add([&] { func2_counter++; }, "func2", kInitDelayUs, kRepeat2Us));
// New function with the same name should override the existing one
// New function with the same name should fail to add
int func_counter2 = 0;
timer.Add([&] { func_counter2++; }, "duplicated_func", kInitDelayUs,
kRepeat1Us);
ASSERT_FALSE(timer.Add([&] { func_counter2++; }, "duplicated_func",
kInitDelayUs, kRepeat1Us));
ASSERT_EQ(0, func_counter1);
ASSERT_EQ(0, func2_counter);
ASSERT_EQ(0, func_counter2);
timer.TEST_WaitForRun(
[&] { mock_clock_->SleepForMicroseconds(kInitDelayUs); });
ASSERT_EQ(0, func_counter1);
ASSERT_EQ(1, func_counter1);
ASSERT_EQ(1, func2_counter);
ASSERT_EQ(1, func_counter2);
timer.TEST_WaitForRun([&] { mock_clock_->SleepForMicroseconds(kRepeat1Us); });
ASSERT_EQ(0, func_counter1);
ASSERT_EQ(2, func_counter1);
ASSERT_EQ(2, func2_counter);
ASSERT_EQ(2, func_counter2);
ASSERT_EQ(0, func_counter2);
ASSERT_TRUE(timer.Shutdown());
}