Introduce job_id for flush and compaction

Summary:
It would be good to assing background job their IDs. Two benefits:
1) makes LOGs more readable
2) I might use it in my EventLogger, which will try to make our LOG easier to read/query/visualize

Test Plan: ran rocksdb, read the LOG

Reviewers: sdong, rven, yhchiang

Reviewed By: yhchiang

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D31617
This commit is contained in:
Igor Canadi 2015-02-12 09:54:48 -08:00
parent 2d62e80511
commit e7ea51a8e7
12 changed files with 80 additions and 57 deletions

View File

@ -75,7 +75,9 @@ ColumnFamilyHandleImpl::ColumnFamilyHandleImpl(
ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() {
if (cfd_ != nullptr) {
JobContext job_context;
// Job id == 0 means that this is not our background process, but rather
// user thread
JobContext job_context(0);
mutex_->Lock();
if (cfd_->Unref()) {
delete cfd_;

View File

@ -202,14 +202,15 @@ struct CompactionJob::CompactionState {
};
CompactionJob::CompactionJob(
Compaction* compaction, const DBOptions& db_options,
int job_id, Compaction* compaction, const DBOptions& db_options,
const MutableCFOptions& mutable_cf_options, const EnvOptions& env_options,
VersionSet* versions, std::atomic<bool>* shutting_down,
LogBuffer* log_buffer, Directory* db_directory, Directory* output_directory,
Statistics* stats, SnapshotList* snapshots, bool is_snapshot_supported,
std::shared_ptr<Cache> table_cache,
std::function<uint64_t()> yield_callback)
: compact_(new CompactionState(compaction)),
: job_id_(job_id),
compact_(new CompactionState(compaction)),
compaction_stats_(1),
db_options_(db_options),
mutable_cf_options_(mutable_cf_options),
@ -235,8 +236,8 @@ void CompactionJob::Prepare() {
ColumnFamilyData* cfd = compact_->compaction->column_family_data();
assert(cfd != nullptr);
LogToBuffer(
log_buffer_, "[%s] Compacting %d@%d + %d@%d files, score %.2f",
cfd->GetName().c_str(), compact_->compaction->num_input_files(0),
log_buffer_, "[%s] [JOB %d] Compacting %d@%d + %d@%d files, score %.2f",
cfd->GetName().c_str(), job_id_, compact_->compaction->num_input_files(0),
compact_->compaction->level(), compact_->compaction->num_input_files(1),
compact_->compaction->output_level(), compact_->compaction->score());
char scratch[2345];
@ -324,8 +325,8 @@ Status CompactionJob::Run() {
if (!ParseInternalKey(key, &ikey)) {
// log error
Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log,
"[%s] Failed to parse key: %s",
cfd->GetName().c_str(), key.ToString().c_str());
"[%s] [JOB %d] Failed to parse key: %s", cfd->GetName().c_str(),
job_id_, key.ToString().c_str());
continue;
} else {
const SliceTransform* transformer =
@ -946,10 +947,11 @@ Status CompactionJob::FinishCompactionOutputFile(Iterator* input) {
s = iter->status();
delete iter;
if (s.ok()) {
Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log,
"[%s] Generated table #%" PRIu64 ": %" PRIu64
" keys, %" PRIu64 " bytes", cfd->GetName().c_str(),
output_number, current_entries, current_bytes);
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"[%s] [JOB %d] Generated table #%" PRIu64 ": %" PRIu64
" keys, %" PRIu64 " bytes",
cfd->GetName().c_str(), job_id_, output_number, current_entries,
current_bytes);
}
}
return s;
@ -964,8 +966,8 @@ Status CompactionJob::InstallCompactionResults(InstrumentedMutex* db_mutex) {
// pick the same files to compact_.
if (!versions_->VerifyCompactionFileConsistency(compact_->compaction)) {
Log(InfoLogLevel::ERROR_LEVEL, db_options_.info_log,
"[%s] Compaction %d@%d + %d@%d files aborted",
compact_->compaction->column_family_data()->GetName().c_str(),
"[%s] [JOB %d] Compaction %d@%d + %d@%d files aborted",
compact_->compaction->column_family_data()->GetName().c_str(), job_id_,
compact_->compaction->num_input_files(0), compact_->compaction->level(),
compact_->compaction->num_input_files(1),
compact_->compaction->output_level());
@ -973,13 +975,11 @@ Status CompactionJob::InstallCompactionResults(InstrumentedMutex* db_mutex) {
}
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"[%s] Compacted %d@%d + %d@%d files => %" PRIu64 " bytes",
compact_->compaction->column_family_data()->GetName().c_str(),
compact_->compaction->num_input_files(0),
compact_->compaction->level(),
"[%s] [JOB %d] Compacted %d@%d + %d@%d files => %" PRIu64 " bytes",
compact_->compaction->column_family_data()->GetName().c_str(), job_id_,
compact_->compaction->num_input_files(0), compact_->compaction->level(),
compact_->compaction->num_input_files(1),
compact_->compaction->output_level(),
compact_->total_bytes);
compact_->compaction->output_level(), compact_->total_bytes);
// Add compaction outputs
compact_->compaction->AddInputDeletions(compact_->compaction->edit());
@ -1043,9 +1043,9 @@ Status CompactionJob::OpenCompactionOutputFile() {
if (!s.ok()) {
Log(InfoLogLevel::ERROR_LEVEL, db_options_.info_log,
"[%s] OpenCompactionOutputFiles for table #%" PRIu64
"[%s] [JOB %d] OpenCompactionOutputFiles for table #%" PRIu64
" fails at NewWritableFile with status %s",
compact_->compaction->column_family_data()->GetName().c_str(),
compact_->compaction->column_family_data()->GetName().c_str(), job_id_,
file_number, s.ToString().c_str());
LogFlush(db_options_.info_log);
return s;

View File

@ -53,7 +53,7 @@ class CompactionJob {
// TODO(icanadi) make effort to reduce number of parameters here
// IMPORTANT: mutable_cf_options needs to be alive while CompactionJob is
// alive
CompactionJob(Compaction* compaction, const DBOptions& db_options,
CompactionJob(int job_id, Compaction* compaction, const DBOptions& db_options,
const MutableCFOptions& mutable_cf_options,
const EnvOptions& env_options, VersionSet* versions,
std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
@ -94,6 +94,8 @@ class CompactionJob {
Status OpenCompactionOutputFile();
void CleanupCompaction(const Status& status);
int job_id_;
// CompactionJob state
struct CompactionState;
CompactionState* compact_;

View File

@ -160,7 +160,7 @@ TEST(CompactionJobTest, Simple) {
};
LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get());
mutex_.Lock();
CompactionJob compaction_job(compaction.get(), db_options_,
CompactionJob compaction_job(0, compaction.get(), db_options_,
*cfd->GetLatestMutableCFOptions(), env_options_,
versions_.get(), &shutting_down_, &log_buffer,
nullptr, nullptr, nullptr, &snapshots, true,

View File

@ -45,7 +45,9 @@ Status DBImpl::DisableFileDeletions() {
}
Status DBImpl::EnableFileDeletions(bool force) {
JobContext job_context;
// Job id == 0 means that this is not our background process, but rather
// user thread
JobContext job_context(0);
bool should_purge_files = false;
{
InstrumentedMutexLock l(&mutex_);

View File

@ -221,6 +221,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname)
options.env->NowMicros() +
db_options_.delete_obsolete_files_period_micros),
last_stats_dump_time_microsec_(0),
next_job_id_(1),
flush_on_destroy_(false),
env_options_(options),
#ifndef ROCKSDB_LITE
@ -309,7 +310,7 @@ DBImpl::~DBImpl() {
// result, all "live" files can get deleted by accident. However, corrupted
// manifest is recoverable by RepairDB().
if (opened_successfully_) {
JobContext job_context;
JobContext job_context(next_job_id_.fetch_add(1));
FindObsoleteFiles(&job_context, true);
// manifest number starting from 2
job_context.manifest_file_number = 1;
@ -650,7 +651,7 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
#ifdef ROCKSDB_LITE
Status s = env_->DeleteFile(fname);
Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log,
"Delete %s type=%d #%" PRIu64 " -- %s\n",
"[JOB %d] Delete %s type=%d #%" PRIu64 " -- %s\n", state.job_id,
fname.c_str(), type, number, s.ToString().c_str());
#else // not ROCKSDB_LITE
if (type == kLogFile && (db_options_.WAL_ttl_seconds > 0 ||
@ -659,7 +660,7 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
} else {
Status s = env_->DeleteFile(fname);
Log(InfoLogLevel::DEBUG_LEVEL, db_options_.info_log,
"Delete %s type=%d #%" PRIu64 " -- %s\n",
"[JOB %d] Delete %s type=%d #%" PRIu64 " -- %s\n", state.job_id,
fname.c_str(), type, number, s.ToString().c_str());
}
#endif // ROCKSDB_LITE
@ -675,12 +676,12 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
std::string full_path_to_delete = (db_options_.db_log_dir.empty() ?
dbname_ : db_options_.db_log_dir) + "/" + to_delete;
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"Delete info log file %s\n",
"[JOB %d] Delete info log file %s\n", state.job_id,
full_path_to_delete.c_str());
Status s = env_->DeleteFile(full_path_to_delete);
if (!s.ok()) {
Log(InfoLogLevel::ERROR_LEVEL,
db_options_.info_log, "Delete info log file %s FAILED -- %s\n",
Log(InfoLogLevel::ERROR_LEVEL, db_options_.info_log,
"[JOB %d] Delete info log file %s FAILED -- %s\n", state.job_id,
to_delete.c_str(), s.ToString().c_str());
}
}
@ -693,7 +694,7 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
void DBImpl::DeleteObsoleteFiles() {
mutex_.AssertHeld();
JobContext job_context;
JobContext job_context(next_job_id_.fetch_add(1));
FindObsoleteFiles(&job_context, true);
if (job_context.HaveSomethingToDelete()) {
PurgeObsoleteFiles(job_context);
@ -1366,17 +1367,18 @@ Status DBImpl::CompactFilesImpl(
// deletion compaction currently not allowed in CompactFiles.
assert(!c->IsDeletionCompaction());
JobContext job_context(true);
JobContext job_context(0, true);
auto yield_callback = [&]() {
return CallFlushDuringCompaction(c->column_family_data(),
*c->mutable_cf_options(), &job_context,
&log_buffer);
};
CompactionJob compaction_job(
c.get(), db_options_, *c->mutable_cf_options(), env_options_,
versions_.get(), &shutting_down_, &log_buffer, directories_.GetDbDir(),
directories_.GetDataDir(c->GetOutputPathId()), stats_, &snapshots_,
is_snapshot_supported_, table_cache_, std::move(yield_callback));
job_context.job_id, c.get(), db_options_, *c->mutable_cf_options(),
env_options_, versions_.get(), &shutting_down_, &log_buffer,
directories_.GetDbDir(), directories_.GetDataDir(c->GetOutputPathId()),
stats_, &snapshots_, is_snapshot_supported_, table_cache_,
std::move(yield_callback));
compaction_job.Prepare();
mutex_.Unlock();
@ -1395,7 +1397,9 @@ Status DBImpl::CompactFilesImpl(
} else if (status.IsShutdownInProgress()) {
// Ignore compaction errors found during shutting down
} else {
Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log, "Compaction error: %s",
Log(InfoLogLevel::WARN_LEVEL, db_options_.info_log,
"[%s] [JOB %d] Compaction error: %s",
c->column_family_data()->GetName().c_str(), job_context.job_id,
status.ToString().c_str());
if (db_options_.paranoid_checks && bg_error_.ok()) {
bg_error_ = status;
@ -1910,7 +1914,7 @@ Status DBImpl::BackgroundFlush(bool* madeProgress, JobContext* job_context,
void DBImpl::BackgroundCallFlush() {
bool madeProgress = false;
JobContext job_context(true);
JobContext job_context(next_job_id_.fetch_add(1), true);
assert(bg_flush_scheduled_);
LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get());
@ -1978,7 +1982,7 @@ void DBImpl::BackgroundCallFlush() {
void DBImpl::BackgroundCallCompaction() {
bool madeProgress = false;
JobContext job_context(true);
JobContext job_context(next_job_id_.fetch_add(1), true);
MaybeDumpStats();
LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get());
@ -2250,10 +2254,11 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, JobContext* job_context,
log_buffer);
};
CompactionJob compaction_job(
c.get(), db_options_, *c->mutable_cf_options(), env_options_,
versions_.get(), &shutting_down_, log_buffer, directories_.GetDbDir(),
directories_.GetDataDir(c->GetOutputPathId()), stats_, &snapshots_,
is_snapshot_supported_, table_cache_, std::move(yield_callback));
job_context->job_id, c.get(), db_options_, *c->mutable_cf_options(),
env_options_, versions_.get(), &shutting_down_, log_buffer,
directories_.GetDbDir(), directories_.GetDataDir(c->GetOutputPathId()),
stats_, &snapshots_, is_snapshot_supported_, table_cache_,
std::move(yield_callback));
compaction_job.Prepare();
mutex_.Unlock();
status = compaction_job.Run();
@ -2361,7 +2366,9 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
IterState* state = reinterpret_cast<IterState*>(arg1);
if (state->super_version->Unref()) {
JobContext job_context;
// Job id == 0 means that this is not our background process, but rather
// user thread
JobContext job_context(0);
state->mu->Lock();
state->super_version->Cleanup();
@ -3525,7 +3532,7 @@ Status DBImpl::DeleteFile(std::string name) {
FileMetaData* metadata;
ColumnFamilyData* cfd;
VersionEdit edit;
JobContext job_context(true);
JobContext job_context(next_job_id_.fetch_add(1), true);
{
InstrumentedMutexLock l(&mutex_);
status = versions_->GetMetadataForFile(number, &level, &metadata, &cfd);

View File

@ -571,6 +571,10 @@ class DBImpl : public DB {
// last time stats were dumped to LOG
std::atomic<uint64_t> last_stats_dump_time_microsec_;
// Each flush or compaction gets its own job id. this counter makes sure
// they're unique
std::atomic<int> next_job_id_;
bool flush_on_destroy_; // Used when disableWAL is true.
static const int KEEP_LOG_FILE_NUM = 1000;

View File

@ -10357,7 +10357,7 @@ TEST(DBTest, DontDeletePendingOutputs) {
// Every time we write to a table file, call FOF/POF with full DB scan. This
// will make sure our pending_outputs_ protection work correctly
std::function<void()> purge_obsolete_files_function = [&]() {
JobContext job_context;
JobContext job_context(0);
dbfull()->TEST_LockMutex();
dbfull()->FindObsoleteFiles(&job_context, true /*force*/);
dbfull()->TEST_UnlockMutex();

View File

@ -148,8 +148,8 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
Arena arena;
for (MemTable* m : mems) {
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"[%s] Flushing memtable with next log file: %" PRIu64 "\n",
cfd_->GetName().c_str(), m->GetNextLogNumber());
"[%s] [JOB %d] Flushing memtable with next log file: %" PRIu64 "\n",
cfd_->GetName().c_str(), job_context_->job_id, m->GetNextLogNumber());
memtables.push_back(m->NewIterator(ro, &arena));
}
{
@ -157,8 +157,8 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
NewMergingIterator(&cfd_->internal_comparator(), &memtables[0],
static_cast<int>(memtables.size()), &arena));
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"[%s] Level-0 flush table #%" PRIu64 ": started",
cfd_->GetName().c_str(), meta.fd.GetNumber());
"[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": started",
cfd_->GetName().c_str(), job_context_->job_id, meta.fd.GetNumber());
s = BuildTable(dbname_, db_options_.env, *cfd_->ioptions(), env_options_,
cfd_->table_cache(), iter.get(), &meta,
@ -168,9 +168,9 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
LogFlush(db_options_.info_log);
}
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"[%s] Level-0 flush table #%" PRIu64 ": %" PRIu64 " bytes %s",
cfd_->GetName().c_str(), meta.fd.GetNumber(), meta.fd.GetFileSize(),
s.ToString().c_str());
"[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": %" PRIu64 " bytes %s",
cfd_->GetName().c_str(), job_context_->job_id, meta.fd.GetNumber(),
meta.fd.GetFileSize(), s.ToString().c_str());
if (!db_options_.disableDataSync && output_file_directory_ != nullptr) {
output_file_directory_->Fsync();
}

View File

@ -81,7 +81,7 @@ class FlushJobTest {
};
TEST(FlushJobTest, Empty) {
JobContext job_context;
JobContext job_context(0);
auto cfd = versions_->GetColumnFamilySet()->GetDefault();
FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(),
db_options_, *cfd->GetLatestMutableCFOptions(),
@ -93,7 +93,7 @@ TEST(FlushJobTest, Empty) {
}
TEST(FlushJobTest, NonEmpty) {
JobContext job_context;
JobContext job_context(0);
auto cfd = versions_->GetColumnFamilySet()->GetDefault();
auto new_mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions());
new_mem->Ref();

View File

@ -158,7 +158,9 @@ void ForwardIterator::Cleanup(bool release_sv) {
if (release_sv) {
if (sv_ != nullptr && sv_->Unref()) {
JobContext job_context;
// Job id == 0 means that this is not our background process, but rather
// user thread
JobContext job_context(0);
db_->mutex_.Lock();
sv_->Cleanup();
db_->FindObsoleteFiles(&job_context, false, true);

View File

@ -36,6 +36,9 @@ struct JobContext {
}
};
// Unique job id
int job_id;
// a list of all files that we'll consider deleting
// (every once in a while this is filled up with all files
// in the DB directory)
@ -67,7 +70,8 @@ struct JobContext {
uint64_t min_pending_output = 0;
explicit JobContext(bool create_superversion = false) {
explicit JobContext(int _job_id, bool create_superversion = false) {
job_id = _job_id;
manifest_file_number = 0;
pending_manifest_file_number = 0;
log_number = 0;