PurgeObsoleteFiles() unittest
Summary: Created a unittest that verifies that automatic deletion performed by PurgeObsoleteFiles() works correctly. Also, few small fixes on the logic part -- call version_set_->GetObsoleteFiles() in FindObsoleteFiles() instead of on some arbitrary positions. Test Plan: Created a unit test Reviewers: dhruba, haobo, nkg- Reviewed By: haobo CC: leveldb Differential Revision: https://reviews.facebook.net/D14079
This commit is contained in:
parent
94dde686bb
commit
a0ce3fd00a
@ -249,7 +249,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
||||
manual_compaction_(nullptr),
|
||||
logger_(nullptr),
|
||||
disable_delete_obsolete_files_(false),
|
||||
delete_obsolete_files_last_run_(0),
|
||||
delete_obsolete_files_last_run_(options.env->NowMicros()),
|
||||
purge_wal_files_last_run_(0),
|
||||
last_stats_dump_time_microsec_(0),
|
||||
default_interval_to_delete_obsolete_WAL_(600),
|
||||
@ -437,7 +437,13 @@ void DBImpl::MaybeDumpStats() {
|
||||
|
||||
// Returns the list of live files in 'sst_live' and the list
|
||||
// of all files in the filesystem in 'all_files'.
|
||||
void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
|
||||
// no_full_scan = true -- never do the full scan using GetChildren()
|
||||
// force = false -- don't force the full scan, except every
|
||||
// options_.delete_obsolete_files_period_micros
|
||||
// force = true -- force the full scan
|
||||
void DBImpl::FindObsoleteFiles(DeletionState& deletion_state,
|
||||
bool force,
|
||||
bool no_full_scan) {
|
||||
mutex_.AssertHeld();
|
||||
|
||||
// if deletion is disabled, do nothing
|
||||
@ -445,14 +451,30 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
|
||||
return;
|
||||
}
|
||||
|
||||
// get obsolete files
|
||||
versions_->GetObsoleteFiles(&deletion_state.sst_delete_files);
|
||||
|
||||
// store the current filenum, lognum, etc
|
||||
deletion_state.manifest_file_number = versions_->ManifestFileNumber();
|
||||
deletion_state.log_number = versions_->LogNumber();
|
||||
deletion_state.prev_log_number = versions_->PrevLogNumber();
|
||||
|
||||
// This method is costly when the number of files is large.
|
||||
// Do not allow it to trigger more often than once in
|
||||
// delete_obsolete_files_period_micros.
|
||||
// TODO we should not be catching live files here,
|
||||
// version_->GetObsoleteFiles() should tell us the truth, which
|
||||
// files are to be deleted. However, it does not, so we do
|
||||
// this to be safe, i.e. never delete files that could be
|
||||
// live
|
||||
deletion_state.sst_live.assign(pending_outputs_.begin(),
|
||||
pending_outputs_.end());
|
||||
versions_->AddLiveFiles(&deletion_state.sst_live);
|
||||
|
||||
// if no_full_scan, never do the full scan
|
||||
if (no_full_scan) {
|
||||
return;
|
||||
}
|
||||
// if force == true, always fall through and do the full scan
|
||||
// if force == false, do the full scan only every
|
||||
// options_.delete_obsolete_files_period_micros
|
||||
if (!force && options_.delete_obsolete_files_period_micros != 0) {
|
||||
const uint64_t now_micros = env_->NowMicros();
|
||||
if (delete_obsolete_files_last_run_ +
|
||||
@ -462,12 +484,6 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
|
||||
delete_obsolete_files_last_run_ = now_micros;
|
||||
}
|
||||
|
||||
// Make a list of all of the live files; set is slow, should not
|
||||
// be used.
|
||||
deletion_state.sst_live.assign(pending_outputs_.begin(),
|
||||
pending_outputs_.end());
|
||||
versions_->AddLiveFiles(&deletion_state.sst_live);
|
||||
|
||||
// set of all files in the directory
|
||||
env_->GetChildren(dbname_, &deletion_state.all_files); // Ignore errors
|
||||
|
||||
@ -488,8 +504,10 @@ void DBImpl::FindObsoleteFiles(DeletionState& deletion_state, bool force) {
|
||||
// files in sst_delete_files and log_delete_files.
|
||||
// It is not necessary to hold the mutex when invoking this method.
|
||||
void DBImpl::PurgeObsoleteFiles(DeletionState& state) {
|
||||
// if deletion is disabled, do nothing
|
||||
if (disable_delete_obsolete_files_) {
|
||||
// this checks if FindObsoleteFiles() was run before. If not, don't do
|
||||
// PurgeObsoleteFiles(). If FindObsoleteFiles() was run, we need to also
|
||||
// run PurgeObsoleteFiles(), even if disable_delete_obsolete_files_ is true
|
||||
if (state.manifest_file_number == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1791,7 +1809,6 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
|
||||
CleanupCompaction(compact, status);
|
||||
versions_->ReleaseCompactionFiles(c.get(), status);
|
||||
c->ReleaseInputs();
|
||||
versions_->GetObsoleteFiles(&deletion_state.sst_delete_files);
|
||||
*madeProgress = true;
|
||||
}
|
||||
c.reset();
|
||||
@ -2454,6 +2471,7 @@ struct IterState {
|
||||
port::Mutex* mu;
|
||||
Version* version;
|
||||
std::vector<MemTable*> mem; // includes both mem_ and imm_
|
||||
DBImpl *db;
|
||||
};
|
||||
|
||||
static void CleanupIteratorState(void* arg1, void* arg2) {
|
||||
@ -2463,7 +2481,12 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
|
||||
state->mem[i]->Unref();
|
||||
}
|
||||
state->version->Unref();
|
||||
// delete only the sst obsolete files
|
||||
DBImpl::DeletionState deletion_state;
|
||||
// fast path FindObsoleteFiles
|
||||
state->db->FindObsoleteFiles(deletion_state, false, true);
|
||||
state->mu->Unlock();
|
||||
state->db->PurgeObsoleteFiles(deletion_state);
|
||||
delete state;
|
||||
}
|
||||
} // namespace
|
||||
@ -2498,6 +2521,7 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
|
||||
versions_->current()->Ref();
|
||||
|
||||
cleanup->mu = &mutex_;
|
||||
cleanup->db = this;
|
||||
cleanup->version = versions_->current();
|
||||
internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, nullptr);
|
||||
|
||||
@ -3375,9 +3399,6 @@ Status DBImpl::DeleteFile(std::string name) {
|
||||
}
|
||||
edit.DeleteFile(level, number);
|
||||
status = versions_->LogAndApply(&edit, &mutex_);
|
||||
if (status.ok()) {
|
||||
versions_->GetObsoleteFiles(&deletion_state.sst_delete_files);
|
||||
}
|
||||
FindObsoleteFiles(deletion_state, false);
|
||||
} // lock released here
|
||||
LogFlush(options_.info_log);
|
||||
|
93
db/db_impl.h
93
db/db_impl.h
@ -123,6 +123,54 @@ class DBImpl : public DB {
|
||||
default_interval_to_delete_obsolete_WAL_ = default_interval_to_delete_obsolete_WAL;
|
||||
}
|
||||
|
||||
// needed for CleanupIteratorState
|
||||
|
||||
struct DeletionState {
|
||||
inline bool HaveSomethingToDelete() const {
|
||||
return all_files.size() ||
|
||||
sst_delete_files.size() ||
|
||||
log_delete_files.size();
|
||||
}
|
||||
// a list of all files that we'll consider deleting
|
||||
// (every once in a while this is filled up with all files
|
||||
// in the DB directory)
|
||||
std::vector<std::string> all_files;
|
||||
|
||||
// the list of all live sst files that cannot be deleted
|
||||
std::vector<uint64_t> sst_live;
|
||||
|
||||
// a list of sst files that we need to delete
|
||||
std::vector<FileMetaData*> sst_delete_files;
|
||||
|
||||
// a list of log files that we need to delete
|
||||
std::vector<uint64_t> log_delete_files;
|
||||
|
||||
// the current manifest_file_number, log_number and prev_log_number
|
||||
// that corresponds to the set of files in 'live'.
|
||||
uint64_t manifest_file_number, log_number, prev_log_number;
|
||||
|
||||
DeletionState() {
|
||||
manifest_file_number = 0;
|
||||
log_number = 0;
|
||||
prev_log_number = 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Returns the list of live files in 'live' and the list
|
||||
// of all files in the filesystem in 'all_files'.
|
||||
// If force == false and the last call was less than
|
||||
// options_.delete_obsolete_files_period_micros microseconds ago,
|
||||
// it will not fill up the deletion_state
|
||||
void FindObsoleteFiles(DeletionState& deletion_state,
|
||||
bool force,
|
||||
bool no_full_scan = false);
|
||||
|
||||
// Diffs the files listed in filenames and those that do not
|
||||
// belong to live files are posibly removed. Also, removes all the
|
||||
// files in sst_delete_files and log_delete_files.
|
||||
// It is not necessary to hold the mutex when invoking this method.
|
||||
void PurgeObsoleteFiles(DeletionState& deletion_state);
|
||||
|
||||
protected:
|
||||
Env* const env_;
|
||||
const std::string dbname_;
|
||||
@ -158,38 +206,6 @@ class DBImpl : public DB {
|
||||
|
||||
const Status CreateArchivalDirectory();
|
||||
|
||||
struct DeletionState {
|
||||
inline bool HaveSomethingToDelete() const {
|
||||
return all_files.size() ||
|
||||
sst_delete_files.size() ||
|
||||
log_delete_files.size();
|
||||
}
|
||||
|
||||
// a list of all files that we'll consider deleting
|
||||
// (every once in a while this is filled up with all files
|
||||
// in the DB directory)
|
||||
std::vector<std::string> all_files;
|
||||
|
||||
// the list of all live sst files that cannot be deleted
|
||||
std::vector<uint64_t> sst_live;
|
||||
|
||||
// a list of sst files that we need to delete
|
||||
std::vector<FileMetaData*> sst_delete_files;
|
||||
|
||||
// a list of log files that we need to delete
|
||||
std::vector<uint64_t> log_delete_files;
|
||||
|
||||
// the current manifest_file_number, log_number and prev_log_number
|
||||
// that corresponds to the set of files in 'live'.
|
||||
uint64_t manifest_file_number, log_number, prev_log_number;
|
||||
|
||||
DeletionState() {
|
||||
manifest_file_number = 0;
|
||||
log_number = 0;
|
||||
prev_log_number = 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Delete any unneeded files and stale in-memory entries.
|
||||
void DeleteObsoleteFiles();
|
||||
|
||||
@ -243,19 +259,6 @@ class DBImpl : public DB {
|
||||
void AllocateCompactionOutputFileNumbers(CompactionState* compact);
|
||||
void ReleaseCompactionUnusedFileNumbers(CompactionState* compact);
|
||||
|
||||
// Returns the list of live files in 'live' and the list
|
||||
// of all files in the filesystem in 'all_files'.
|
||||
// If force == false and the last call was less than
|
||||
// options_.delete_obsolete_files_period_micros microseconds ago,
|
||||
// it will not fill up the deletion_state
|
||||
void FindObsoleteFiles(DeletionState& deletion_state, bool force);
|
||||
|
||||
// Diffs the files listed in filenames and those that do not
|
||||
// belong to live files are posibly removed. Also, removes all the
|
||||
// files in sst_delete_files and log_delete_files.
|
||||
// It is not necessary to hold the mutex when invoking this method.
|
||||
void PurgeObsoleteFiles(DeletionState& deletion_state);
|
||||
|
||||
void PurgeObsoleteWALFiles();
|
||||
|
||||
Status AppendSortedWalsOfType(const std::string& path,
|
||||
|
@ -40,6 +40,7 @@ class DeleteFileTest {
|
||||
options_.WAL_ttl_seconds = 300; // Used to test log files
|
||||
options_.WAL_size_limit_MB = 1024; // Used to test log files
|
||||
dbname_ = test::TmpDir() + "/deletefile_test";
|
||||
options_.wal_dir = dbname_ + "/wal_files";
|
||||
DestroyDB(dbname_, options_);
|
||||
numlevels_ = 7;
|
||||
ASSERT_OK(ReopenDB(true));
|
||||
@ -107,6 +108,28 @@ class DeleteFileTest {
|
||||
ASSERT_OK(dbi->TEST_WaitForFlushMemTable());
|
||||
}
|
||||
|
||||
void CheckFileTypeCounts(std::string& dir,
|
||||
int required_log,
|
||||
int required_sst,
|
||||
int required_manifest) {
|
||||
std::vector<std::string> filenames;
|
||||
env_->GetChildren(dir, &filenames);
|
||||
|
||||
int log_cnt = 0, sst_cnt = 0, manifest_cnt = 0;
|
||||
for (auto file : filenames) {
|
||||
uint64_t number;
|
||||
FileType type;
|
||||
if (ParseFileName(file, &number, &type)) {
|
||||
log_cnt += (type == kLogFile);
|
||||
sst_cnt += (type == kTableFile);
|
||||
manifest_cnt += (type == kDescriptorFile);
|
||||
}
|
||||
}
|
||||
ASSERT_EQ(required_log, log_cnt);
|
||||
ASSERT_EQ(required_sst, sst_cnt);
|
||||
ASSERT_EQ(required_manifest, manifest_cnt);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
TEST(DeleteFileTest, AddKeysAndQueryLevels) {
|
||||
@ -156,6 +179,34 @@ TEST(DeleteFileTest, AddKeysAndQueryLevels) {
|
||||
CloseDB();
|
||||
}
|
||||
|
||||
TEST(DeleteFileTest, PurgeObsoleteFilesTest) {
|
||||
CreateTwoLevels();
|
||||
// there should be only one (empty) log file because CreateTwoLevels()
|
||||
// flushes the memtables to disk
|
||||
CheckFileTypeCounts(options_.wal_dir, 1, 0, 0);
|
||||
// 2 ssts, 1 manifest
|
||||
CheckFileTypeCounts(dbname_, 0, 2, 1);
|
||||
std::string first("0"), last("999999");
|
||||
Slice first_slice(first), last_slice(last);
|
||||
db_->CompactRange(&first_slice, &last_slice, true, 2);
|
||||
// 1 sst after compaction
|
||||
CheckFileTypeCounts(dbname_, 0, 1, 1);
|
||||
|
||||
// this time, we keep an iterator alive
|
||||
ReopenDB(true);
|
||||
Iterator *itr = 0;
|
||||
CreateTwoLevels();
|
||||
itr = db_->NewIterator(ReadOptions());
|
||||
db_->CompactRange(&first_slice, &last_slice, true, 2);
|
||||
// 3 sst after compaction with live iterator
|
||||
CheckFileTypeCounts(dbname_, 0, 3, 1);
|
||||
delete itr;
|
||||
// 1 sst after iterator deletion
|
||||
CheckFileTypeCounts(dbname_, 0, 1, 1);
|
||||
|
||||
CloseDB();
|
||||
}
|
||||
|
||||
TEST(DeleteFileTest, DeleteFileWithIterator) {
|
||||
CreateTwoLevels();
|
||||
ReadOptions options;
|
||||
@ -196,11 +247,11 @@ TEST(DeleteFileTest, DeleteLogFiles) {
|
||||
// Should not succeed because live logs are not allowed to be deleted
|
||||
std::unique_ptr<LogFile> alive_log = std::move(logfiles.back());
|
||||
ASSERT_EQ(alive_log->Type(), kAliveLogFile);
|
||||
ASSERT_TRUE(env_->FileExists(dbname_ + "/" + alive_log->PathName()));
|
||||
ASSERT_TRUE(env_->FileExists(options_.wal_dir + "/" + alive_log->PathName()));
|
||||
fprintf(stdout, "Deleting alive log file %s\n",
|
||||
alive_log->PathName().c_str());
|
||||
ASSERT_TRUE(!db_->DeleteFile(alive_log->PathName()).ok());
|
||||
ASSERT_TRUE(env_->FileExists(dbname_ + "/" + alive_log->PathName()));
|
||||
ASSERT_TRUE(env_->FileExists(options_.wal_dir + "/" + alive_log->PathName()));
|
||||
logfiles.clear();
|
||||
|
||||
// Call Flush to bring about a new working log file and add more keys
|
||||
@ -214,11 +265,13 @@ TEST(DeleteFileTest, DeleteLogFiles) {
|
||||
ASSERT_GT(logfiles.size(), 0UL);
|
||||
std::unique_ptr<LogFile> archived_log = std::move(logfiles.front());
|
||||
ASSERT_EQ(archived_log->Type(), kArchivedLogFile);
|
||||
ASSERT_TRUE(env_->FileExists(dbname_ + "/" + archived_log->PathName()));
|
||||
ASSERT_TRUE(env_->FileExists(options_.wal_dir + "/" +
|
||||
archived_log->PathName()));
|
||||
fprintf(stdout, "Deleting archived log file %s\n",
|
||||
archived_log->PathName().c_str());
|
||||
ASSERT_OK(db_->DeleteFile(archived_log->PathName()));
|
||||
ASSERT_TRUE(!env_->FileExists(dbname_ + "/" + archived_log->PathName()));
|
||||
ASSERT_TRUE(!env_->FileExists(options_.wal_dir + "/" +
|
||||
archived_log->PathName()));
|
||||
CloseDB();
|
||||
}
|
||||
|
||||
|
@ -387,7 +387,9 @@ struct Options {
|
||||
bool disable_seek_compaction;
|
||||
|
||||
// The periodicity when obsolete files get deleted. The default
|
||||
// value is 6 hours.
|
||||
// value is 6 hours. The files that get out of scope by compaction
|
||||
// process will still get automatically delete on every compaction,
|
||||
// regardless of this setting
|
||||
uint64_t delete_obsolete_files_period_micros;
|
||||
|
||||
// Maximum number of concurrent background jobs, submitted to
|
||||
|
Loading…
Reference in New Issue
Block a user