Small changes in Deleting obsolete files

Summary:
@haobo's suggestions from https://reviews.facebook.net/D13827

Renaming some variables, deprecating purge_log_after_flush, changing for loop into auto for loop.

I have not implemented deleting objects outside of mutex yet because it would require a big code change - we would delete object in db_impl, which currently does not know anything about object because it's defined in version_edit.h (FileMetaData). We should do it at some point, though.

Test Plan: Ran deletefile_test

Reviewers: haobo

Reviewed By: haobo

CC: leveldb, haobo

Differential Revision: https://reviews.facebook.net/D14025
This commit is contained in:
Igor Canadi 2013-11-12 11:53:26 -08:00
parent dad425562f
commit 9bc4a26f56
6 changed files with 51 additions and 56 deletions

View File

@ -435,8 +435,8 @@ void DBImpl::MaybeDumpStats() {
}
}
// Returns the list of live files in 'sstlive' and the list
// of all files in the filesystem in 'allfiles'.
// Returns the list of live files in 'sst_live' and the list
// of all files in the filesystem in 'all_files'.
void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
mutex_.AssertHeld();
@ -464,19 +464,19 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
// Make a list of all of the live files; set is slow, should not
// be used.
deletion_state.sstlive.assign(pending_outputs_.begin(),
deletion_state.sst_live.assign(pending_outputs_.begin(),
pending_outputs_.end());
versions_->AddLiveFiles(&deletion_state.sstlive);
versions_->AddLiveFiles(&deletion_state.sst_live);
// set of all files in the directory
env_->GetChildren(dbname_, &deletion_state.allfiles); // Ignore errors
env_->GetChildren(dbname_, &deletion_state.all_files); // Ignore errors
//Add log files in wal_dir
if (options_.wal_dir != dbname_) {
std::vector<std::string> log_files;
env_->GetChildren(options_.wal_dir, &log_files); // Ignore errors
deletion_state.allfiles.insert(
deletion_state.allfiles.end(),
deletion_state.all_files.insert(
deletion_state.all_files.end(),
log_files.begin(),
log_files.end()
);
@ -485,7 +485,7 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
// Diffs the files listed in filenames and those that do not
// belong to live files are posibly removed. Also, removes all the
// files in sstdeletefiles and logdeletefiles.
// files in sst_delete_files and log_delete_files.
// It is not necessary to hold the mutex when invoking this method.
void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
// if deletion is disabled, do nothing
@ -499,23 +499,26 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
// Now, convert live list to an unordered set, WITHOUT mutex held;
// set is slow.
std::unordered_set<uint64_t> live_set(state.sstlive.begin(),
state.sstlive.end());
std::unordered_set<uint64_t> live_set(state.sst_live.begin(),
state.sst_live.end());
state.allfiles.reserve(state.allfiles.size() + state.sstdeletefiles.size());
for (auto filenum : state.sstdeletefiles) {
state.allfiles.push_back(TableFileName("", filenum));
state.all_files.reserve(state.all_files.size() +
state.sst_delete_files.size());
for (auto file : state.sst_delete_files) {
state.all_files.push_back(TableFileName("", file->number));
delete file;
}
state.allfiles.reserve(state.allfiles.size() + state.logdeletefiles.size());
for (auto filenum : state.logdeletefiles) {
state.all_files.reserve(state.all_files.size() +
state.log_delete_files.size());
for (auto filenum : state.log_delete_files) {
if (filenum > 0) {
state.allfiles.push_back(LogFileName("", filenum));
state.all_files.push_back(LogFileName("", filenum));
}
}
for (size_t i = 0; i < state.allfiles.size(); i++) {
if (ParseFileName(state.allfiles[i], &number, &type)) {
for (size_t i = 0; i < state.all_files.size(); i++) {
if (ParseFileName(state.all_files[i], &number, &type)) {
bool keep = true;
switch (type) {
case kLogFile:
@ -538,7 +541,7 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
case kInfoLogFile:
keep = true;
if (number != 0) {
old_log_files.push_back(state.allfiles[i]);
old_log_files.push_back(state.all_files[i]);
}
break;
case kCurrentFile:
@ -559,14 +562,14 @@ void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
Status st;
if (type == kLogFile && (options_.WAL_ttl_seconds > 0 ||
options_.WAL_size_limit_MB > 0)) {
st = env_->RenameFile(dbname_ + "/" + state.allfiles[i],
st = env_->RenameFile(dbname_ + "/" + state.all_files[i],
ArchivedLogFileName(options_.wal_dir,
number));
if (!st.ok()) {
Log(options_.info_log, "RenameFile logfile #%lu FAILED", number);
}
} else {
st = env_->DeleteFile(dbname_ + "/" + state.allfiles[i]);
st = env_->DeleteFile(dbname_ + "/" + state.all_files[i]);
if (!st.ok()) {
Log(options_.info_log, "Delete type=%d #%lu FAILED\n",
int(type), number);
@ -1132,10 +1135,10 @@ Status DBImpl::FlushMemTableToOutputFile(bool* madeProgress,
MaybeScheduleLogDBDeployStats();
if (options_.purge_log_after_memtable_flush &&
!disable_delete_obsolete_files_) {
if (!disable_delete_obsolete_files_) {
// add to deletion state
deletion_state.logdeletefiles.insert(deletion_state.logdeletefiles.end(),
deletion_state.log_delete_files.insert(
deletion_state.log_delete_files.end(),
logs_to_delete.begin(),
logs_to_delete.end());
}
@ -1776,7 +1779,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
CleanupCompaction(compact, status);
versions_->ReleaseCompactionFiles(c.get(), status);
c->ReleaseInputs();
versions_->GetAndFreeObsoleteFiles(&deletion_state.sstdeletefiles);
versions_->GetObsoleteFiles(&deletion_state.sst_delete_files);
*madeProgress = true;
}
c.reset();
@ -3366,7 +3369,7 @@ Status DBImpl::DeleteFile(std::string name) {
edit.DeleteFile(level, number);
status = versions_->LogAndApply(&edit, &mutex_);
if (status.ok()) {
versions_->GetAndFreeObsoleteFiles(&deletion_state.sstdeletefiles);
versions_->GetObsoleteFiles(&deletion_state.sst_delete_files);
}
FindObsoleteFiles(deletion_state, false);
} // lock released here

View File

@ -14,6 +14,7 @@
#include "db/dbformat.h"
#include "db/log_writer.h"
#include "db/snapshot.h"
#include "db/version_edit.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/memtablerep.h"
@ -159,22 +160,24 @@ class DBImpl : public DB {
struct DeletionState {
inline bool HaveSomethingToDelete() const {
return allfiles.size() || sstdeletefiles.size() || logdeletefiles.size();
return all_files.size() ||
sst_delete_files.size() ||
log_delete_files.size();
}
// a list of all files that we'll consider deleting
// (every once in a while this is filled up with all files
// in the DB directory)
std::vector<std::string> allfiles;
std::vector<std::string> all_files;
// the list of all live sst files that cannot be deleted
std::vector<uint64_t> sstlive;
std::vector<uint64_t> sst_live;
// a list of sst files that we need to delete
std::vector<uint64_t> sstdeletefiles;
std::vector<FileMetaData*> sst_delete_files;
// a list of log files that we need to delete
std::vector<uint64_t> logdeletefiles;
std::vector<uint64_t> log_delete_files;
// the current manifest_file_number, log_number and prev_log_number
// that corresponds to the set of files in 'live'.
@ -241,7 +244,7 @@ class DBImpl : public DB {
void ReleaseCompactionUnusedFileNumbers(CompactionState* compact);
// Returns the list of live files in 'live' and the list
// of all files in the filesystem in 'allfiles'.
// of all files in the filesystem in 'all_files'.
// If force == false and the last call was less than
// options_.delete_obsolete_files_period_micros microseconds ago,
// it will not fill up the deletion_state
@ -249,7 +252,7 @@ class DBImpl : public DB {
// Diffs the files listed in filenames and those that do not
// belong to live files are posibly removed. Also, removes all the
// files in sstdeletefiles and logdeletefiles.
// files in sst_delete_files and log_delete_files.
// It is not necessary to hold the mutex when invoking this method.
void PurgeObsoleteFiles(DeletionState& deletion_state);

View File

@ -1161,7 +1161,10 @@ VersionSet::VersionSet(const std::string& dbname,
VersionSet::~VersionSet() {
current_->Unref();
assert(dummy_versions_.next_ == &dummy_versions_); // List must be empty
GetAndFreeObsoleteFiles(nullptr);
for (auto file : obsolete_files_) {
delete file;
}
obsolete_files_.clear();
delete[] compact_pointer_;
delete[] max_file_size_;
delete[] level_max_bytes_;
@ -2861,16 +2864,10 @@ void VersionSet::GetLiveFilesMetaData(
}
}
void VersionSet::GetAndFreeObsoleteFiles(std::vector<uint64_t>* files) {
if (files != nullptr) {
files->reserve(files->size() + obsolete_files_.size());
}
for (size_t i = 0; i < obsolete_files_.size(); i++) {
if (files != nullptr) {
files->push_back(obsolete_files_[i]->number);
}
delete obsolete_files_[i];
}
void VersionSet::GetObsoleteFiles(std::vector<FileMetaData*>* files) {
files->insert(files->end(),
obsolete_files_.begin(),
obsolete_files_.end());
obsolete_files_.clear();
}

View File

@ -431,7 +431,7 @@ class VersionSet {
void GetLiveFilesMetaData(
std::vector<LiveFileMetaData> *metadata);
void GetAndFreeObsoleteFiles(std::vector<uint64_t>* files);
void GetObsoleteFiles(std::vector<FileMetaData*>* files);
private:
class Builder;

View File

@ -609,11 +609,6 @@ struct Options {
// Default: a factory that doesn't provide any object
std::shared_ptr<CompactionFilterFactory> compaction_filter_factory;
// Remove the log file immediately after the corresponding memtable is flushed
// to data file.
// Default: true
bool purge_log_after_memtable_flush;
// This option allows user to to collect their own interested statistics of
// the tables.
// Default: emtpy vector -- no user-defined statistics collection will be

View File

@ -101,7 +101,6 @@ Options::Options()
compaction_filter_factory(
std::shared_ptr<CompactionFilterFactory>(
new DefaultCompactionFilterFactory())),
purge_log_after_memtable_flush(true),
inplace_update_support(false),
inplace_update_num_locks(10000) {
assert(memtable_factory.get() != nullptr);
@ -280,8 +279,6 @@ Options::Dump(Logger* log) const
Log(log,"Options.compaction_options_universal."
"max_size_amplification_percent: %u",
compaction_options_universal.max_size_amplification_percent);
Log(log," Options.purge_log_after_memtable_flush: %d",
purge_log_after_memtable_flush);
std::string collector_names;
for (auto collector : table_stats_collectors) {
collector_names.append(collector->Name());