Fix data race against logging data structure because of LogBuffer

Summary:
@igor pointed out that there is a potential data race because of the way we use the newly introduced LogBuffer. After "bg_compaction_scheduled_--" or "bg_flush_scheduled_--", they can both become 0. As soon as the lock is released after that, DBImpl's deconstructor can go ahead and deconstruct all the states inside DB, including the info_log object hold in a shared pointer of the options object it keeps. At that point it is not safe anymore to continue using the info logger to write the delayed logs.

With the patch, lock is released temporarily for log buffer to be flushed before "bg_compaction_scheduled_--" or "bg_flush_scheduled_--". In order to make sure we don't miss any pending flush or compaction, a new flag bg_schedule_needed_ is added, which is set to be true if there is a pending flush or compaction but not scheduled because of the max thread limit. If the flag is set to be true, the scheduling function will be called before compaction or flush thread finishes.

Thanks @igor for this finding!

Test Plan: make all check

Reviewers: haobo, igor

Reviewed By: haobo

CC: dhruba, ljin, yhchiang, igor, leveldb

Differential Revision: https://reviews.facebook.net/D16767
This commit is contained in:
sdong 2014-03-11 10:36:30 -07:00
parent 02dab3be11
commit bd45633b71
3 changed files with 51 additions and 15 deletions

View File

@ -276,6 +276,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
super_version_number_(0), super_version_number_(0),
local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)), local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)),
tmp_batch_(), tmp_batch_(),
bg_schedule_needed_(false),
bg_compaction_scheduled_(0), bg_compaction_scheduled_(0),
bg_manual_only_(0), bg_manual_only_(0),
bg_flush_scheduled_(0), bg_flush_scheduled_(0),
@ -1830,17 +1831,21 @@ Status DBImpl::TEST_WaitForCompact() {
void DBImpl::MaybeScheduleFlushOrCompaction() { void DBImpl::MaybeScheduleFlushOrCompaction() {
mutex_.AssertHeld(); mutex_.AssertHeld();
bg_schedule_needed_ = false;
if (bg_work_gate_closed_) { if (bg_work_gate_closed_) {
// gate closed for backgrond work // gate closed for backgrond work
} else if (shutting_down_.Acquire_Load()) { } else if (shutting_down_.Acquire_Load()) {
// DB is being deleted; no more background compactions // DB is being deleted; no more background compactions
} else { } else {
bool is_flush_pending = imm_.IsFlushPending(); bool is_flush_pending = imm_.IsFlushPending();
if (is_flush_pending && if (is_flush_pending) {
(bg_flush_scheduled_ < options_.max_background_flushes)) { if (bg_flush_scheduled_ < options_.max_background_flushes) {
// memtable flush needed // memtable flush needed
bg_flush_scheduled_++; bg_flush_scheduled_++;
env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH); env_->Schedule(&DBImpl::BGWorkFlush, this, Env::Priority::HIGH);
} else {
bg_schedule_needed_ = true;
}
} }
// Schedule BGWorkCompaction if there's a compaction pending (or a memtable // Schedule BGWorkCompaction if there's a compaction pending (or a memtable
@ -1850,11 +1855,13 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
if ((manual_compaction_ || if ((manual_compaction_ ||
versions_->current()->NeedsCompaction() || versions_->current()->NeedsCompaction() ||
(is_flush_pending && (options_.max_background_flushes <= 0))) && (is_flush_pending && (options_.max_background_flushes <= 0))) &&
bg_compaction_scheduled_ < options_.max_background_compactions &&
(!bg_manual_only_ || manual_compaction_)) { (!bg_manual_only_ || manual_compaction_)) {
if (bg_compaction_scheduled_ < options_.max_background_compactions) {
bg_compaction_scheduled_++; bg_compaction_scheduled_++;
env_->Schedule(&DBImpl::BGWorkCompaction, this, Env::Priority::LOW); env_->Schedule(&DBImpl::BGWorkCompaction, this, Env::Priority::LOW);
} else {
bg_schedule_needed_ = true;
}
} }
} }
} }
@ -1912,15 +1919,26 @@ void DBImpl::BackgroundCallFlush() {
// to delete all obsolete files and we force FindObsoleteFiles() // to delete all obsolete files and we force FindObsoleteFiles()
FindObsoleteFiles(deletion_state, !s.ok()); FindObsoleteFiles(deletion_state, !s.ok());
// delete unnecessary files if any, this is done outside the mutex // delete unnecessary files if any, this is done outside the mutex
if (deletion_state.HaveSomethingToDelete()) { if (deletion_state.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
mutex_.Unlock(); mutex_.Unlock();
// Have to flush the info logs before bg_flush_scheduled_--
// because if bg_flush_scheduled_ becomes 0 and the lock is
// released, the deconstructor of DB can kick in and destroy all the
// states of DB so info_log might not be available after that point.
// It also applies to access other states that DB owns.
log_buffer.FlushBufferToLog(); log_buffer.FlushBufferToLog();
if (deletion_state.HaveSomethingToDelete()) {
PurgeObsoleteFiles(deletion_state); PurgeObsoleteFiles(deletion_state);
}
mutex_.Lock(); mutex_.Lock();
} }
bg_flush_scheduled_--; bg_flush_scheduled_--;
if (madeProgress) { // Any time the mutex is released After finding the work to do, another
// thread might execute MaybeScheduleFlushOrCompaction(). It is possible
// that there is a pending job but it is not scheduled because of the
// max thread limit.
if (madeProgress || bg_schedule_needed_) {
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
} }
log_buffer.FlushBufferToLog(); log_buffer.FlushBufferToLog();
@ -1979,10 +1997,17 @@ void DBImpl::BackgroundCallCompaction() {
FindObsoleteFiles(deletion_state, !s.ok()); FindObsoleteFiles(deletion_state, !s.ok());
// delete unnecessary files if any, this is done outside the mutex // delete unnecessary files if any, this is done outside the mutex
if (deletion_state.HaveSomethingToDelete()) { if (deletion_state.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
mutex_.Unlock(); mutex_.Unlock();
// Have to flush the info logs before bg_compaction_scheduled_--
// because if bg_flush_scheduled_ becomes 0 and the lock is
// released, the deconstructor of DB can kick in and destroy all the
// states of DB so info_log might not be available after that point.
// It also applies to access other states that DB owns.
log_buffer.FlushBufferToLog(); log_buffer.FlushBufferToLog();
if (deletion_state.HaveSomethingToDelete()) {
PurgeObsoleteFiles(deletion_state); PurgeObsoleteFiles(deletion_state);
}
mutex_.Lock(); mutex_.Lock();
} }
@ -1993,7 +2018,12 @@ void DBImpl::BackgroundCallCompaction() {
// Previous compaction may have produced too many files in a level, // Previous compaction may have produced too many files in a level,
// So reschedule another compaction if we made progress in the // So reschedule another compaction if we made progress in the
// last compaction. // last compaction.
if (madeProgress) { //
// Also, any time the mutex is released After finding the work to do,
// another thread might execute MaybeScheduleFlushOrCompaction(). It is
// possible that there is a pending job but it is not scheduled because of
// the max thread limit.
if (madeProgress || bg_schedule_needed_) {
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
} }
log_buffer.FlushBufferToLog(); log_buffer.FlushBufferToLog();

View File

@ -454,6 +454,10 @@ class DBImpl : public DB {
// part of ongoing compactions. // part of ongoing compactions.
std::set<uint64_t> pending_outputs_; std::set<uint64_t> pending_outputs_;
// At least one compaction or flush job is pending but not yet scheduled
// because of the max background thread limit.
bool bg_schedule_needed_;
// count how many background compactions are running or have been scheduled // count how many background compactions are running or have been scheduled
int bg_compaction_scheduled_; int bg_compaction_scheduled_;

View File

@ -23,6 +23,8 @@ class LogBuffer {
// Add a log entry to the buffer. // Add a log entry to the buffer.
void AddLogToBuffer(const char* format, va_list ap); void AddLogToBuffer(const char* format, va_list ap);
size_t IsEmpty() const { return logs_.empty(); }
// Flush all buffered log to the info log. // Flush all buffered log to the info log.
void FlushBufferToLog(); void FlushBufferToLog();