Support taking a configurable number of files from the same level to compact in a single compaction run.

Summary:
The compaction process takes some files from LevelK and
merges it into LevelK+1. The number of files it picks from
LevelK was capped such a way that the total amount of
data picked does not exceed the maxfilesize of that level.
This essentially meant that only one file from LevelK
is picked for a single compaction.

For bulkloads, we would like to take many many file from
LevelK and compact them using a single compaction run.

This patch introduces a option called the 'source_compaction_factor'
(similar to expanded_compaction_factor). It is a multiplier
that is multiplied by the maxfilesize of that level to arrive
at the limit that is used to throttle the number of source
files from LevelK.  For bulk loads, set source_compaction_factor
to a very high number so that multiple files from the same
level are picked for compaction in a single compaction.

The default value of source_compaction_factor is 1, so that
we can keep backward compatibilty with existing compaction semantics.

Test Plan: make clean check

Reviewers: emayanke, sheki

Reviewed By: emayanke

CC: leveldb

Differential Revision: https://reviews.facebook.net/D6867
This commit is contained in:
Dhruba Borthakur 2012-11-20 23:07:41 -08:00
parent fbb73a4ac3
commit 7632fdb5cb
4 changed files with 23 additions and 2 deletions

View File

@ -221,6 +221,10 @@ static bool FLAGS_read_only = false;
// Do not auto trigger compactions
static bool FLAGS_disable_auto_compactions = false;
// Cap the size of data in levelK for a compaction run
// that compacts Levelk with LevelK+1
static int FLAGS_source_compaction_factor = 1;
extern bool useOsBuffer;
extern bool useFsReadAhead;
extern bool useMmapRead;
@ -978,6 +982,7 @@ class Benchmark {
options.max_grandparent_overlap_factor =
FLAGS_max_grandparent_overlap_factor;
options.disable_auto_compactions = FLAGS_disable_auto_compactions;
options.source_compaction_factor = FLAGS_source_compaction_factor;
Status s;
if(FLAGS_read_only) {
s = DB::OpenForReadOnly(options, FLAGS_db, &db_);
@ -1431,6 +1436,9 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--disable_auto_compactions=%d%c",
&n, &junk) == 1 && (n == 0 || n ==1)) {
FLAGS_disable_auto_compactions = n;
} else if (sscanf(argv[i], "--source_compaction_factor=%d%c",
&n, &junk) == 1 && n > 0) {
FLAGS_source_compaction_factor = n;
} else {
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
exit(1);

View File

@ -2028,7 +2028,8 @@ Compaction* VersionSet::CompactRange(
}
// Avoid compacting too much in one shot in case the range is large.
const uint64_t limit = MaxFileSizeForLevel(level);
const uint64_t limit = MaxFileSizeForLevel(level) *
options_->source_compaction_factor;
uint64_t total = 0;
for (size_t i = 0; i < inputs.size(); i++) {
uint64_t s = inputs[i]->file_size;
@ -2039,7 +2040,7 @@ Compaction* VersionSet::CompactRange(
}
}
Compaction* c = new Compaction(level, limit,
Compaction* c = new Compaction(level, MaxFileSizeForLevel(level),
MaxGrandParentOverlapBytes(level), NumberLevels());
c->input_version_ = current_;
c->input_version_->Ref();

View File

@ -238,6 +238,15 @@ struct Options {
// (expanded_compaction_factor * targetFileSizeLevel()) many bytes.
int expanded_compaction_factor;
// Maximum number of bytes in all source files to be compacted in a
// single compaction run. We avoid picking too many files in the
// source level so that we do not exceed the total source bytes
// for compaction to exceed
// (source_compaction_factor * targetFileSizeLevel()) many bytes.
// Default:1, i.e. pick maxfilesize amount of data as the source of
// a compaction.
int source_compaction_factor;
// Control maximum bytes of overlaps in grandparent (i.e., level+2) before we
// stop building a single file in a level->level+1 compaction.
int max_grandparent_overlap_factor;

View File

@ -37,6 +37,7 @@ Options::Options()
max_bytes_for_level_base(10 * 1048576),
max_bytes_for_level_multiplier(10),
expanded_compaction_factor(25),
source_compaction_factor(1),
max_grandparent_overlap_factor(10),
statistics(NULL),
disableDataSync(false),
@ -115,6 +116,8 @@ Options::Dump(
max_bytes_for_level_multiplier);
Log(log," Options.expanded_compaction_factor: %d",
expanded_compaction_factor);
Log(log," Options.source_compaction_factor: %d",
source_compaction_factor);
Log(log," Options.max_grandparent_overlap_factor: %d",
max_grandparent_overlap_factor);
Log(log," Options.db_log_dir: %s",