Speed up FindObsoleteFiles

Summary:
Here's one solution we discussed on speeding up FindObsoleteFiles. Keep a set of all files in DBImpl and update the set every time we create a file. I probably missed few other spots where we create a file.

It might speed things up a bit, but makes code uglier. I don't really like it.

Much better approach would be to abstract all file handling to a separate class. Think of it as layer between DBImpl and Env. Having a separate class deal with file namings and deletion would benefit both code cleanliness (especially with huge DBImpl) and speed things up. It will take a huge effort to do this, though.

Let's discuss offline today.

Test Plan: Ran ./db_stress, verified that files are getting deleted

Reviewers: dhruba, haobo, kailiu, emayanke

Reviewed By: dhruba

Differential Revision: https://reviews.facebook.net/D13827
This commit is contained in:
Igor Canadi 2013-11-08 15:23:46 -08:00
parent dd218bbc88
commit 1510339e52
8 changed files with 169 additions and 124 deletions

View File

@ -28,9 +28,15 @@ Status DBImpl::DisableFileDeletions() {
} }
Status DBImpl::EnableFileDeletions() { Status DBImpl::EnableFileDeletions() {
MutexLock l(&mutex_); DeletionState deletion_state;
disable_delete_obsolete_files_ = false; {
Log(options_.info_log, "File Deletions Enabled"); MutexLock l(&mutex_);
disable_delete_obsolete_files_ = false;
Log(options_.info_log, "File Deletions Enabled");
FindObsoleteFiles(deletion_state, true);
}
PurgeObsoleteFiles(deletion_state);
LogFlush(options_.info_log);
return Status::OK(); return Status::OK();
} }

View File

@ -110,22 +110,6 @@ struct DBImpl::CompactionState {
} }
}; };
struct DBImpl::DeletionState {
// the list of all live files that cannot be deleted
std::vector<uint64_t> live;
// a list of all siles that exists in the db directory
std::vector<std::string> allfiles;
// the current filenumber, lognumber and prevlognumber
// that corresponds to the set of files in 'live'.
uint64_t filenumber, lognumber, prevlognumber;
// the list of all files to be evicted from the table cache
std::vector<uint64_t> files_to_evict;
};
// Fix user-supplied options to be reasonable // Fix user-supplied options to be reasonable
template <class T, class V> template <class T, class V>
static void ClipToRange(T* ptr, V minvalue, V maxvalue) { static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
@ -451,9 +435,9 @@ void DBImpl::MaybeDumpStats() {
} }
} }
// Returns the list of live files in 'live' and the list // Returns the list of live files in 'sstlive' and the list
// of all files in the filesystem in 'allfiles'. // of all files in the filesystem in 'allfiles'.
void DBImpl::FindObsoleteFiles(DeletionState& deletion_state) { void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
mutex_.AssertHeld(); mutex_.AssertHeld();
// if deletion is disabled, do nothing // if deletion is disabled, do nothing
@ -461,10 +445,15 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state) {
return; return;
} }
// store the current filenum, lognum, etc
deletion_state.manifest_file_number = versions_->ManifestFileNumber();
deletion_state.log_number = versions_->LogNumber();
deletion_state.prev_log_number = versions_->PrevLogNumber();
// This method is costly when the number of files is large. // This method is costly when the number of files is large.
// Do not allow it to trigger more often than once in // Do not allow it to trigger more often than once in
// delete_obsolete_files_period_micros. // delete_obsolete_files_period_micros.
if (options_.delete_obsolete_files_period_micros != 0) { if (!force && options_.delete_obsolete_files_period_micros != 0) {
const uint64_t now_micros = env_->NowMicros(); const uint64_t now_micros = env_->NowMicros();
if (delete_obsolete_files_last_run_ + if (delete_obsolete_files_last_run_ +
options_.delete_obsolete_files_period_micros > now_micros) { options_.delete_obsolete_files_period_micros > now_micros) {
@ -475,9 +464,9 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state) {
// Make a list of all of the live files; set is slow, should not // Make a list of all of the live files; set is slow, should not
// be used. // be used.
deletion_state.live.assign(pending_outputs_.begin(), deletion_state.sstlive.assign(pending_outputs_.begin(),
pending_outputs_.end()); pending_outputs_.end());
versions_->AddLiveFiles(&deletion_state.live); versions_->AddLiveFiles(&deletion_state.sstlive);
// set of all files in the directory // set of all files in the directory
env_->GetChildren(dbname_, &deletion_state.allfiles); // Ignore errors env_->GetChildren(dbname_, &deletion_state.allfiles); // Ignore errors
@ -492,59 +481,51 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state) {
log_files.end() log_files.end()
); );
} }
// store the current filenum, lognum, etc
deletion_state.filenumber = versions_->ManifestFileNumber();
deletion_state.lognumber = versions_->LogNumber();
deletion_state.prevlognumber = versions_->PrevLogNumber();
}
Status DBImpl::DeleteLogFile(uint64_t number) {
Status s;
auto filename = LogFileName(options_.wal_dir, number);
if (options_.WAL_ttl_seconds > 0 || options_.WAL_size_limit_MB > 0) {
s = env_->RenameFile(filename,
ArchivedLogFileName(options_.wal_dir, number));
if (!s.ok()) {
Log(options_.info_log, "RenameFile logfile #%lu FAILED", number);
}
} else {
s = env_->DeleteFile(filename);
if(!s.ok()) {
Log(options_.info_log, "Delete logfile #%lu FAILED", number);
}
}
return s;
} }
// Diffs the files listed in filenames and those that do not // Diffs the files listed in filenames and those that do not
// belong to live files are posibly removed. If the removed file // belong to live files are posibly removed. Also, removes all the
// is a sst file, then it returns the file number in files_to_evict. // files in sstdeletefiles and logdeletefiles.
// It is not necessary to hold the mutex when invoking this method. // It is not necessary to hold the mutex when invoking this method.
void DBImpl::PurgeObsoleteFiles(DeletionState& state) { void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
// if deletion is disabled, do nothing
if (disable_delete_obsolete_files_) {
return;
}
uint64_t number; uint64_t number;
FileType type; FileType type;
std::vector<std::string> old_log_files; std::vector<std::string> old_log_files;
// Now, convert live list to an unordered set, WITHOUT mutex held; // Now, convert live list to an unordered set, WITHOUT mutex held;
// set is slow. // set is slow.
std::unordered_set<uint64_t> live_set(state.live.begin(), std::unordered_set<uint64_t> live_set(state.sstlive.begin(),
state.live.end()); state.sstlive.end());
state.allfiles.reserve(state.allfiles.size() + state.sstdeletefiles.size());
for (auto filenum : state.sstdeletefiles) {
state.allfiles.push_back(TableFileName("", filenum));
}
state.allfiles.reserve(state.allfiles.size() + state.logdeletefiles.size());
for (auto filenum : state.logdeletefiles) {
if (filenum > 0) {
state.allfiles.push_back(LogFileName("", filenum));
}
}
for (size_t i = 0; i < state.allfiles.size(); i++) { for (size_t i = 0; i < state.allfiles.size(); i++) {
if (ParseFileName(state.allfiles[i], &number, &type)) { if (ParseFileName(state.allfiles[i], &number, &type)) {
bool keep = true; bool keep = true;
switch (type) { switch (type) {
case kLogFile: case kLogFile:
keep = ((number >= state.lognumber) || keep = ((number >= state.log_number) ||
(number == state.prevlognumber)); (number == state.prev_log_number));
break; break;
case kDescriptorFile: case kDescriptorFile:
// Keep my manifest file, and any newer incarnations' // Keep my manifest file, and any newer incarnations'
// (in case there is a race that allows other incarnations) // (in case there is a race that allows other incarnations)
keep = (number >= state.filenumber); keep = (number >= state.manifest_file_number);
break; break;
case kTableFile: case kTableFile:
keep = (live_set.find(number) != live_set.end()); keep = (live_set.find(number) != live_set.end());
@ -570,19 +551,25 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
if (!keep) { if (!keep) {
if (type == kTableFile) { if (type == kTableFile) {
// record the files to be evicted from the cache // evict from cache
state.files_to_evict.push_back(number); table_cache_->Evict(number);
} }
Log(options_.info_log, "Delete type=%d #%lu", int(type), number); Log(options_.info_log, "Delete type=%d #%lu", int(type), number);
if (type == kLogFile) { Status st;
DeleteLogFile(number); if (type == kLogFile && (options_.WAL_ttl_seconds > 0 ||
options_.WAL_size_limit_MB > 0)) {
st = env_->RenameFile(dbname_ + "/" + state.allfiles[i],
ArchivedLogFileName(options_.wal_dir,
number));
if (!st.ok()) {
Log(options_.info_log, "RenameFile logfile #%lu FAILED", number);
}
} else { } else {
Status st = env_->DeleteFile(dbname_ + "/" + state.allfiles[i]); st = env_->DeleteFile(dbname_ + "/" + state.allfiles[i]);
if (!st.ok()) { if (!st.ok()) {
Log(options_.info_log, "Delete type=%d #%lld FAILED\n", Log(options_.info_log, "Delete type=%d #%lu FAILED\n",
int(type), int(type), number);
static_cast<unsigned long long>(number));
} }
} }
} }
@ -605,20 +592,14 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
} }
} }
PurgeObsoleteWALFiles(); PurgeObsoleteWALFiles();
} LogFlush(options_.info_log);
void DBImpl::EvictObsoleteFiles(DeletionState& state) {
for (unsigned int i = 0; i < state.files_to_evict.size(); i++) {
table_cache_->Evict(state.files_to_evict[i]);
}
} }
void DBImpl::DeleteObsoleteFiles() { void DBImpl::DeleteObsoleteFiles() {
mutex_.AssertHeld(); mutex_.AssertHeld();
DeletionState deletion_state; DeletionState deletion_state;
FindObsoleteFiles(deletion_state); FindObsoleteFiles(deletion_state, true);
PurgeObsoleteFiles(deletion_state); PurgeObsoleteFiles(deletion_state);
EvictObsoleteFiles(deletion_state);
} }
// 1. Go through all archived files and // 1. Go through all archived files and
@ -1091,7 +1072,8 @@ Status DBImpl::WriteLevel0Table(std::vector<MemTable*> &mems, VersionEdit* edit,
return s; return s;
} }
Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress) { Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
DeletionState& deletion_state) {
mutex_.AssertHeld(); mutex_.AssertHeld();
assert(imm_.size() != 0); assert(imm_.size() != 0);
@ -1149,22 +1131,13 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress) {
} }
MaybeScheduleLogDBDeployStats(); MaybeScheduleLogDBDeployStats();
// TODO: if log deletion failed for any reason, we probably
// should store the file number in the shared state, and retry if (options_.purge_log_after_memtable_flush &&
// However, for now, PurgeObsoleteFiles will take care of that !disable_delete_obsolete_files_) {
// anyways. // add to deletion state
bool should_delete_log = options_.purge_log_after_memtable_flush && deletion_state.logdeletefiles.insert(deletion_state.logdeletefiles.end(),
!disable_delete_obsolete_files_; logs_to_delete.begin(),
if (should_delete_log) { logs_to_delete.end());
for (auto log_num : logs_to_delete) {
if (log_num < 0) {
continue;
}
mutex_.Unlock();
DeleteLogFile(log_num);
LogFlush(options_.info_log);
mutex_.Lock();
}
} }
} }
return s; return s;
@ -1621,25 +1594,27 @@ void DBImpl::BGWorkCompaction(void* db) {
reinterpret_cast<DBImpl*>(db)->BackgroundCallCompaction(); reinterpret_cast<DBImpl*>(db)->BackgroundCallCompaction();
} }
Status DBImpl::BackgroundFlush(bool* madeProgress) { Status DBImpl::BackgroundFlush(bool* madeProgress,
DeletionState& deletion_state) {
Status stat; Status stat;
while (stat.ok() && while (stat.ok() &&
imm_.IsFlushPending(options_.min_write_buffer_number_to_merge)) { imm_.IsFlushPending(options_.min_write_buffer_number_to_merge)) {
Log(options_.info_log, Log(options_.info_log,
"BackgroundCallFlush doing FlushMemTableToOutputFile, flush slots available %d", "BackgroundCallFlush doing FlushMemTableToOutputFile, flush slots available %d",
options_.max_background_flushes - bg_flush_scheduled_); options_.max_background_flushes - bg_flush_scheduled_);
stat = FlushMemTableToOutputFile(madeProgress); stat = FlushMemTableToOutputFile(madeProgress, deletion_state);
} }
return stat; return stat;
} }
void DBImpl::BackgroundCallFlush() { void DBImpl::BackgroundCallFlush() {
bool madeProgress = false; bool madeProgress = false;
DeletionState deletion_state;
assert(bg_flush_scheduled_); assert(bg_flush_scheduled_);
MutexLock l(&mutex_); MutexLock l(&mutex_);
if (!shutting_down_.Acquire_Load()) { if (!shutting_down_.Acquire_Load()) {
Status s = BackgroundFlush(&madeProgress); Status s = BackgroundFlush(&madeProgress, deletion_state);
if (!s.ok()) { if (!s.ok()) {
// Wait a little bit before retrying background compaction in // Wait a little bit before retrying background compaction in
// case this is an environmental problem and we do not want to // case this is an environmental problem and we do not want to
@ -1652,9 +1627,18 @@ void DBImpl::BackgroundCallFlush() {
LogFlush(options_.info_log); LogFlush(options_.info_log);
env_->SleepForMicroseconds(1000000); env_->SleepForMicroseconds(1000000);
mutex_.Lock(); mutex_.Lock();
// clean up all the files we might have created
FindObsoleteFiles(deletion_state, true);
} }
} }
// delete unnecessary files if any, this is done outside the mutex
if (deletion_state.HaveSomethingToDelete()) {
mutex_.Unlock();
PurgeObsoleteFiles(deletion_state);
mutex_.Lock();
}
bg_flush_scheduled_--; bg_flush_scheduled_--;
if (madeProgress) { if (madeProgress) {
MaybeScheduleFlushOrCompaction(); MaybeScheduleFlushOrCompaction();
@ -1690,17 +1674,16 @@ void DBImpl::BackgroundCallCompaction() {
LogFlush(options_.info_log); LogFlush(options_.info_log);
env_->SleepForMicroseconds(1000000); env_->SleepForMicroseconds(1000000);
mutex_.Lock(); mutex_.Lock();
// clean up all the files we might have created
FindObsoleteFiles(deletion_state, true);
} }
} }
// delete unnecessary files if any, this is done outside the mutex // delete unnecessary files if any, this is done outside the mutex
if (!deletion_state.live.empty()) { if (deletion_state.HaveSomethingToDelete()) {
mutex_.Unlock(); mutex_.Unlock();
PurgeObsoleteFiles(deletion_state); PurgeObsoleteFiles(deletion_state);
EvictObsoleteFiles(deletion_state);
LogFlush(options_.info_log);
mutex_.Lock(); mutex_.Lock();
} }
bg_compaction_scheduled_--; bg_compaction_scheduled_--;
@ -1728,7 +1711,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
"BackgroundCompaction doing FlushMemTableToOutputFile, compaction slots " "BackgroundCompaction doing FlushMemTableToOutputFile, compaction slots "
"available %d", "available %d",
options_.max_background_compactions - bg_compaction_scheduled_); options_.max_background_compactions - bg_compaction_scheduled_);
Status stat = FlushMemTableToOutputFile(madeProgress); Status stat = FlushMemTableToOutputFile(madeProgress, deletion_state);
if (!stat.ok()) { if (!stat.ok()) {
return stat; return stat;
} }
@ -1783,11 +1766,12 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
} else { } else {
MaybeScheduleFlushOrCompaction(); // do more compaction work in parallel. MaybeScheduleFlushOrCompaction(); // do more compaction work in parallel.
CompactionState* compact = new CompactionState(c.get()); CompactionState* compact = new CompactionState(c.get());
status = DoCompactionWork(compact); status = DoCompactionWork(compact, deletion_state);
CleanupCompaction(compact, status); CleanupCompaction(compact, status);
versions_->ReleaseCompactionFiles(c.get(), status); versions_->ReleaseCompactionFiles(c.get(), status);
c->ReleaseInputs(); c->ReleaseInputs();
FindObsoleteFiles(deletion_state); versions_->GetAndFreeObsoleteFiles(&deletion_state.sstdeletefiles);
FindObsoleteFiles(deletion_state, false);
*madeProgress = true; *madeProgress = true;
} }
c.reset(); c.reset();
@ -2044,7 +2028,8 @@ inline SequenceNumber DBImpl::findEarliestVisibleSnapshot(
return 0; return 0;
} }
Status DBImpl::DoCompactionWork(CompactionState* compact) { Status DBImpl::DoCompactionWork(CompactionState* compact,
DeletionState& deletion_state) {
assert(compact); assert(compact);
int64_t imm_micros = 0; // Micros spent doing imm_ compactions int64_t imm_micros = 0; // Micros spent doing imm_ compactions
Log(options_.info_log, Log(options_.info_log,
@ -2120,7 +2105,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
LogFlush(options_.info_log); LogFlush(options_.info_log);
mutex_.Lock(); mutex_.Lock();
if (imm_.IsFlushPending(options_.min_write_buffer_number_to_merge)) { if (imm_.IsFlushPending(options_.min_write_buffer_number_to_merge)) {
FlushMemTableToOutputFile(); FlushMemTableToOutputFile(nullptr, deletion_state);
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
} }
mutex_.Unlock(); mutex_.Unlock();
@ -3376,7 +3361,7 @@ Status DBImpl::DeleteFile(std::string name) {
edit.DeleteFile(level, number); edit.DeleteFile(level, number);
status = versions_->LogAndApply(&edit, &mutex_); status = versions_->LogAndApply(&edit, &mutex_);
if (status.ok()) { if (status.ok()) {
FindObsoleteFiles(deletion_state); versions_->GetAndFreeObsoleteFiles(&deletion_state.sstdeletefiles);
} }
} // lock released here } // lock released here
LogFlush(options_.info_log); LogFlush(options_.info_log);
@ -3384,7 +3369,6 @@ Status DBImpl::DeleteFile(std::string name) {
if (status.ok()) { if (status.ok()) {
// remove files outside the db-lock // remove files outside the db-lock
PurgeObsoleteFiles(deletion_state); PurgeObsoleteFiles(deletion_state);
EvictObsoleteFiles(deletion_state);
} }
return status; return status;
} }

View File

@ -69,6 +69,7 @@ class DBImpl : public DB {
virtual Status Flush(const FlushOptions& options); virtual Status Flush(const FlushOptions& options);
virtual Status DisableFileDeletions(); virtual Status DisableFileDeletions();
virtual Status EnableFileDeletions(); virtual Status EnableFileDeletions();
// All the returned filenames start with "/"
virtual Status GetLiveFiles(std::vector<std::string>&, virtual Status GetLiveFiles(std::vector<std::string>&,
uint64_t* manifest_file_size, uint64_t* manifest_file_size,
bool flush_memtable = true); bool flush_memtable = true);
@ -143,7 +144,6 @@ class DBImpl : public DB {
friend class DB; friend class DB;
struct CompactionState; struct CompactionState;
struct Writer; struct Writer;
struct DeletionState;
Status NewDB(); Status NewDB();
@ -157,12 +157,37 @@ class DBImpl : public DB {
const Status CreateArchivalDirectory(); const Status CreateArchivalDirectory();
struct DeletionState {
inline bool HaveSomethingToDelete() const {
return allfiles.size() || sstdeletefiles.size() || logdeletefiles.size();
}
// a list of all files that we'll consider deleting
// (every once in a while this is filled up with all files
// in the DB directory)
std::vector<std::string> allfiles;
// the list of all live sst files that cannot be deleted
std::vector<uint64_t> sstlive;
// a list of sst files that we need to delete
std::vector<uint64_t> sstdeletefiles;
// a list of log files that we need to delete
std::vector<uint64_t> logdeletefiles;
// the current manifest_file_number, log_number and prev_log_number
// that corresponds to the set of files in 'live'.
uint64_t manifest_file_number, log_number, prev_log_number;
};
// Delete any unneeded files and stale in-memory entries. // Delete any unneeded files and stale in-memory entries.
void DeleteObsoleteFiles(); void DeleteObsoleteFiles();
// Flush the in-memory write buffer to storage. Switches to a new // Flush the in-memory write buffer to storage. Switches to a new
// log-file/memtable and writes a new descriptor iff successful. // log-file/memtable and writes a new descriptor iff successful.
Status FlushMemTableToOutputFile(bool* madeProgress = nullptr); Status FlushMemTableToOutputFile(bool* madeProgress,
DeletionState& deletion_state);
Status RecoverLogFile(uint64_t log_number, Status RecoverLogFile(uint64_t log_number,
VersionEdit* edit, VersionEdit* edit,
@ -198,9 +223,10 @@ class DBImpl : public DB {
void BackgroundCallCompaction(); void BackgroundCallCompaction();
void BackgroundCallFlush(); void BackgroundCallFlush();
Status BackgroundCompaction(bool* madeProgress,DeletionState& deletion_state); Status BackgroundCompaction(bool* madeProgress,DeletionState& deletion_state);
Status BackgroundFlush(bool* madeProgress); Status BackgroundFlush(bool* madeProgress, DeletionState& deletion_state);
void CleanupCompaction(CompactionState* compact, Status status); void CleanupCompaction(CompactionState* compact, Status status);
Status DoCompactionWork(CompactionState* compact); Status DoCompactionWork(CompactionState* compact,
DeletionState& deletion_state);
Status OpenCompactionOutputFile(CompactionState* compact); Status OpenCompactionOutputFile(CompactionState* compact);
Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);
@ -208,21 +234,19 @@ class DBImpl : public DB {
void AllocateCompactionOutputFileNumbers(CompactionState* compact); void AllocateCompactionOutputFileNumbers(CompactionState* compact);
void ReleaseCompactionUnusedFileNumbers(CompactionState* compact); void ReleaseCompactionUnusedFileNumbers(CompactionState* compact);
// Returns the list of live files in 'live' and the list // Returns the list of live files in 'live' and the list
// of all files in the filesystem in 'allfiles'. // of all files in the filesystem in 'allfiles'.
void FindObsoleteFiles(DeletionState& deletion_state); // If force == false and the last call was less than
// options_.delete_obsolete_files_period_micros microseconds ago,
// it will not fill up the deletion_state
void FindObsoleteFiles(DeletionState& deletion_state, bool force);
// Diffs the files listed in filenames and those that do not // Diffs the files listed in filenames and those that do not
// belong to live files are posibly removed. If the removed file // belong to live files are posibly removed. Also, removes all the
// is a sst file, then it returns the file number in files_to_evict. // files in sstdeletefiles and logdeletefiles.
// It is not necessary to hold the mutex when invoking this method.
void PurgeObsoleteFiles(DeletionState& deletion_state); void PurgeObsoleteFiles(DeletionState& deletion_state);
// Removes the file listed in files_to_evict from the table_cache
void EvictObsoleteFiles(DeletionState& deletion_state);
Status DeleteLogFile(uint64_t number);
void PurgeObsoleteWALFiles(); void PurgeObsoleteWALFiles();
Status AppendSortedWalsOfType(const std::string& path, Status AppendSortedWalsOfType(const std::string& path,

View File

@ -3616,8 +3616,11 @@ TEST(DBTest, SnapshotFiles) {
ASSERT_EQ(system(mkdir.c_str()), 0); ASSERT_EQ(system(mkdir.c_str()), 0);
for (unsigned int i = 0; i < files.size(); i++) { for (unsigned int i = 0; i < files.size(); i++) {
std::string src = dbname_ + "/" + files[i]; // our clients require that GetLiveFiles returns
std::string dest = snapdir + "/" + files[i]; // files with "/" as first character!
ASSERT_EQ(files[i][0], '/');
std::string src = dbname_ + files[i];
std::string dest = snapdir + files[i];
uint64_t size; uint64_t size;
ASSERT_OK(env_->GetFileSize(src, &size)); ASSERT_OK(env_->GetFileSize(src, &size));

View File

@ -50,7 +50,7 @@ Version::~Version() {
assert(f->refs > 0); assert(f->refs > 0);
f->refs--; f->refs--;
if (f->refs <= 0) { if (f->refs <= 0) {
delete f; vset_->obsolete_files_.push_back(f);
} }
} }
} }
@ -1161,6 +1161,7 @@ VersionSet::VersionSet(const std::string& dbname,
VersionSet::~VersionSet() { VersionSet::~VersionSet() {
current_->Unref(); current_->Unref();
assert(dummy_versions_.next_ == &dummy_versions_); // List must be empty assert(dummy_versions_.next_ == &dummy_versions_); // List must be empty
GetAndFreeObsoleteFiles(nullptr);
delete[] compact_pointer_; delete[] compact_pointer_;
delete[] max_file_size_; delete[] max_file_size_;
delete[] level_max_bytes_; delete[] level_max_bytes_;
@ -1239,6 +1240,8 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu,
std::string new_manifest_file; std::string new_manifest_file;
uint64_t new_manifest_file_size = 0; uint64_t new_manifest_file_size = 0;
Status s; Status s;
// we will need this if we are creating new manifest
uint64_t old_manifest_file_number = manifest_file_number_;
// No need to perform this check if a new Manifest is being created anyways. // No need to perform this check if a new Manifest is being created anyways.
if (!descriptor_log_ || if (!descriptor_log_ ||
@ -1247,7 +1250,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu,
manifest_file_number_ = NewFileNumber(); // Change manifest file no. manifest_file_number_ = NewFileNumber(); // Change manifest file no.
} }
if (!descriptor_log_ || new_descriptor_log) { if (new_descriptor_log) {
new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_); new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_);
edit->SetNextFile(next_file_number_); edit->SetNextFile(next_file_number_);
} }
@ -1313,6 +1316,15 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu,
// new CURRENT file that points to it. // new CURRENT file that points to it.
if (s.ok() && !new_manifest_file.empty()) { if (s.ok() && !new_manifest_file.empty()) {
s = SetCurrentFile(env_, dbname_, manifest_file_number_); s = SetCurrentFile(env_, dbname_, manifest_file_number_);
if (s.ok() && old_manifest_file_number < manifest_file_number_) {
// delete old manifest file
Log(options_->info_log,
"Deleting manifest %lu current manifest %lu\n",
old_manifest_file_number, manifest_file_number_);
// we don't care about an error here, PurgeObsoleteFiles will take care
// of it later
env_->DeleteFile(DescriptorFileName(dbname_, old_manifest_file_number));
}
} }
// find offset in manifest file where this version is stored. // find offset in manifest file where this version is stored.
@ -2849,6 +2861,19 @@ void VersionSet::GetLiveFilesMetaData(
} }
} }
void VersionSet::GetAndFreeObsoleteFiles(std::vector<uint64_t>* files) {
if (files != nullptr) {
files->reserve(files->size() + obsolete_files_.size());
}
for (size_t i = 0; i < obsolete_files_.size(); i++) {
if (files != nullptr) {
files->push_back(obsolete_files_[i]->number);
}
delete obsolete_files_[i];
}
obsolete_files_.clear();
}
Compaction* VersionSet::CompactRange( Compaction* VersionSet::CompactRange(
int level, int level,
const InternalKey* begin, const InternalKey* begin,

View File

@ -431,6 +431,8 @@ class VersionSet {
void GetLiveFilesMetaData( void GetLiveFilesMetaData(
std::vector<LiveFileMetaData> *metadata); std::vector<LiveFileMetaData> *metadata);
void GetAndFreeObsoleteFiles(std::vector<uint64_t>* files);
private: private:
class Builder; class Builder;
struct ManifestWriter; struct ManifestWriter;
@ -507,6 +509,8 @@ class VersionSet {
// Save us the cost of checking file size twice in LogAndApply // Save us the cost of checking file size twice in LogAndApply
uint64_t last_observed_manifest_size_; uint64_t last_observed_manifest_size_;
std::vector<FileMetaData*> obsolete_files_;
// storage options for all reads and writes except compactions // storage options for all reads and writes except compactions
const EnvOptions& storage_options_; const EnvOptions& storage_options_;

View File

@ -387,8 +387,7 @@ struct Options {
bool disable_seek_compaction; bool disable_seek_compaction;
// The periodicity when obsolete files get deleted. The default // The periodicity when obsolete files get deleted. The default
// value is 0 which means that obsolete files get removed after // value is 6 hours.
// every compaction run.
uint64_t delete_obsolete_files_period_micros; uint64_t delete_obsolete_files_period_micros;
// Maximum number of concurrent background jobs, submitted to // Maximum number of concurrent background jobs, submitted to

View File

@ -62,7 +62,7 @@ Options::Options()
db_log_dir(""), db_log_dir(""),
wal_dir(""), wal_dir(""),
disable_seek_compaction(false), disable_seek_compaction(false),
delete_obsolete_files_period_micros(0), delete_obsolete_files_period_micros(6 * 60 * 60 * 1000000UL),
max_background_compactions(1), max_background_compactions(1),
max_background_flushes(0), max_background_flushes(0),
max_log_file_size(0), max_log_file_size(0),