The deletion of obsolete files should not occur very frequently.

Summary:
The method DeleteObsolete files is a very costly methind, especially
when the number of files in a system is large. It makes a list of
all live-files and then scans the directory to compute the diff.
By default, this method is executed after every compaction run.

This patch makes it such that DeleteObsolete files is never
invoked twice within a configured period.

Test Plan: run all unit tests

Reviewers: heyongqiang, MarkCallaghan

Reviewed By: MarkCallaghan

Differential Revision: https://reviews.facebook.net/D6045
This commit is contained in:
Dhruba Borthakur 2012-10-16 08:53:46 -07:00
parent 0230866791
commit aa73538f2a
5 changed files with 34 additions and 2 deletions

View File

@ -167,6 +167,11 @@ static int FLAGS_readwritepercent = 90;
// Option to disable compation triggered by read.
static int FLAGS_disable_seek_compaction = false;
// Option to delete obsolete files periodically
// Default: 0 which means that obsolete files are
// deleted after every compaction run.
static uint64_t FLAGS_delete_obsolete_files_period_micros = 0;
// Algorithm to use to compress the database
static enum leveldb::CompressionType FLAGS_compression_type =
leveldb::kSnappyCompression;
@ -886,6 +891,8 @@ class Benchmark {
FLAGS_level0_slowdown_writes_trigger;
options.compression = FLAGS_compression_type;
options.disable_seek_compaction = FLAGS_disable_seek_compaction;
options.delete_obsolete_files_period_micros =
FLAGS_delete_obsolete_files_period_micros;
Status s = DB::Open(options, FLAGS_db, &db_);
if (!s.ok()) {
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
@ -1290,6 +1297,9 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--disable_seek_compaction=%d%c", &n, &junk) == 1
&& (n == 0 || n == 1)) {
FLAGS_disable_seek_compaction = n;
} else if (sscanf(argv[i], "--delete_obsolete_files_period_micros=%ld%c",
&l, &junk) == 1) {
FLAGS_delete_obsolete_files_period_micros = n;
} else if (sscanf(argv[i], "--stats_interval=%d%c", &n, &junk) == 1 &&
n >= 0 && n < 2000000000) {
FLAGS_stats_interval = n;

View File

@ -143,7 +143,8 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
bg_logstats_scheduled_(false),
manual_compaction_(NULL),
logger_(NULL),
disable_delete_obsolete_files_(false) {
disable_delete_obsolete_files_(false),
delete_obsolete_files_last_run_(0) {
mem_->Ref();
has_imm_.Release_Store(NULL);
@ -253,6 +254,18 @@ void DBImpl::DeleteObsoleteFiles() {
return;
}
// This method is costly when the number of files is large.
// Do not allow it to trigger more often than once in
// delete_obsolete_files_period_micros.
if (options_.delete_obsolete_files_period_micros != 0) {
const uint64_t now_micros = env_->NowMicros();
if (delete_obsolete_files_last_run_ +
options_.delete_obsolete_files_period_micros > now_micros) {
return;
}
delete_obsolete_files_last_run_ = now_micros;
}
// Make a set of all of the live files
std::set<uint64_t> live = pending_outputs_;
versions_->AddLiveFiles(&live);

View File

@ -197,6 +197,9 @@ class DBImpl : public DB {
// shall we disable deletion of obsolete files
bool disable_delete_obsolete_files_;
// last time when DeleteObsoleteFiles was invoked
uint64_t delete_obsolete_files_last_run_;
// Per level compaction stats. stats_[level] stores the stats for
// compactions that produced data for the specified "level".
struct CompactionStats {

View File

@ -240,6 +240,11 @@ struct Options {
// (which is true if max_open_files is large).
bool disable_seek_compaction;
// The periodicity when obsolete files get deleted. The default
// value is 0 which means that obsolete files get removed after
// every compaction run.
uint64_t delete_obsolete_files_period_micros;
// Create an Options object with default values for all fields.
Options();

View File

@ -41,7 +41,8 @@ Options::Options()
use_fsync(false),
db_stats_log_interval(1800),
db_log_dir(""),
disable_seek_compaction(false) {
disable_seek_compaction(false),
delete_obsolete_files_period_micros(0) {
}
void