Enabling trivial move in universal compaction

Summary: This change enables trivial move if all the input files are non onverlapping while doing Universal Compaction.

Test Plan: ./compaction_picker_test and db_test ran successfully with the new testcases.

Reviewers: sdong

Reviewed By: sdong

Subscribers: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D40875
This commit is contained in:
Poornima Chozhiyath Raman 2015-07-07 14:18:55 -07:00
parent d8e3e766f9
commit c0b23dd5b0
9 changed files with 267 additions and 14 deletions

View File

@ -8,6 +8,7 @@
* Several new features on EventListener (see include/rocksdb/listener.h):
- OnCompationCompleted() now returns per-compaciton job statistics, defined in include/rocksdb/compaction_job_stats.h.
- Added OnTableFileCreated() and OnTableFileDeleted().
* Add compaction_options_universal.enable_trivial_move to true, to allow trivial move while performing universal compaction. Trivial move will happen only when all the input files are non overlapping.
### Public API changes
* EventListener::OnFlushCompleted() now passes FlushJobInfo instead of a list of parameters.

View File

@ -167,6 +167,12 @@ bool Compaction::IsTrivialMove() const {
return false;
}
// Used in universal compaction, where trivial move can be done if the
// input files are non overlapping
if (cfd_->ioptions()->compaction_options_universal.allow_trivial_move) {
return is_trivial_move_;
}
return (start_level_ != output_level_ && num_input_levels() == 1 &&
input(0, 0)->fd.GetPathId() == GetOutputPathId() &&
InputCompressionMatchesOutput() &&

View File

@ -158,6 +158,19 @@ class Compaction {
// Was this compaction triggered manually by the client?
bool IsManualCompaction() { return is_manual_compaction_; }
// Used when allow_trivial_move option is set in
// Universal compaction. If all the input files are
// non overlapping, then is_trivial_move_ variable
// will be set true, else false
void set_is_trivial_move(bool trivial_move) {
is_trivial_move_ = trivial_move;
}
// Used when allow_trivial_move option is set in
// Universal compaction. Returns true, if the input files
// are non-overlapping and can be trivially moved.
bool is_trivial_move() { return is_trivial_move_; }
// Return the MutableCFOptions that should be used throughout the compaction
// procedure
const MutableCFOptions* mutable_cf_options() { return &mutable_cf_options_; }
@ -238,6 +251,11 @@ class Compaction {
// Is this compaction requested by the client?
const bool is_manual_compaction_;
// True if we can do trivial move in Universal multi level
// compaction
bool is_trivial_move_;
// "level_ptrs_" holds indices into "input_version_->levels_", where each
// index remembers which file of an associated level we are currently used
// to check KeyNotExistsBeyondOutputLevel() for deletion operation.

View File

@ -15,6 +15,7 @@
#include <inttypes.h>
#include <limits>
#include <queue>
#include <string>
#include <utility>
@ -37,6 +38,64 @@ uint64_t TotalCompensatedFileSize(const std::vector<FileMetaData*>& files) {
return sum;
}
// Used in universal compaction when trivial move is enabled.
// This structure is used for the construction of min heap
// that contains the file meta data, the level of the file
// and the index of the file in that level
struct InputFileInfo {
FileMetaData* f;
unsigned int level;
unsigned int index;
};
// Used in universal compaction when trivial move is enabled.
// This comparator is used for the construction of min heap
// based on the smallest key of the file.
struct UserKeyComparator {
explicit UserKeyComparator(const Comparator* ucmp) { ucmp_ = ucmp; }
bool operator()(InputFileInfo i1, InputFileInfo i2) const {
return (ucmp_->Compare(i1.f->smallest.user_key(),
i2.f->smallest.user_key()) > 0);
}
private:
const Comparator* ucmp_;
};
typedef std::priority_queue<InputFileInfo, std::vector<InputFileInfo>,
UserKeyComparator> SmallestKeyHeap;
// This function creates the heap that is used to find if the files are
// overlapping during universal compaction when the allow_trivial_move
// is set.
SmallestKeyHeap create_level_heap(Compaction* c, const Comparator* ucmp) {
SmallestKeyHeap smallest_key_priority_q =
SmallestKeyHeap(UserKeyComparator(ucmp));
InputFileInfo input_file;
for (unsigned int l = 0; l < c->num_input_levels(); l++) {
if (c->num_input_files(l) != 0) {
if (l == 0 && c->start_level() == 0) {
for (size_t i = 0; i < c->num_input_files(0); i++) {
input_file.f = c->input(0, i);
input_file.level = 0;
input_file.index = i;
smallest_key_priority_q.push(std::move(input_file));
}
} else {
input_file.f = c->input(l, 0);
input_file.level = l;
input_file.index = 0;
smallest_key_priority_q.push(std::move(input_file));
}
}
}
return smallest_key_priority_q;
}
} // anonymous namespace
// Determine compression type, based on user options, level of the output
@ -1106,6 +1165,50 @@ void GetSmallestLargestSeqno(const std::vector<FileMetaData*>& files,
} // namespace
#endif
// Algorithm that checks to see if there are any overlapping
// files in the input
bool CompactionPicker::IsInputNonOverlapping(Compaction* c) {
auto comparator = icmp_->user_comparator();
int first_iter = 1;
InputFileInfo prev, curr, next;
SmallestKeyHeap smallest_key_priority_q =
create_level_heap(c, icmp_->user_comparator());
while (!smallest_key_priority_q.empty()) {
curr = smallest_key_priority_q.top();
smallest_key_priority_q.pop();
if (first_iter) {
prev = curr;
first_iter = 0;
} else {
if (comparator->Compare(prev.f->largest.user_key(),
curr.f->smallest.user_key()) >= 0) {
// found overlapping files, return false
return false;
}
assert(comparator->Compare(curr.f->largest.user_key(),
prev.f->largest.user_key()) > 0);
prev = curr;
}
next.f = nullptr;
if (curr.level != 0 && curr.index < c->num_input_files(curr.level) - 1) {
next.f = c->input(curr.level, curr.index + 1);
next.level = curr.level;
next.index = curr.index + 1;
}
if (next.f) {
smallest_key_priority_q.push(std::move(next));
}
}
return true;
}
// Universal style of compaction. Pick files that are contiguous in
// time-range to compact.
//
@ -1168,6 +1271,10 @@ Compaction* UniversalCompactionPicker::PickCompaction(
return nullptr;
}
if (ioptions_.compaction_options_universal.allow_trivial_move == true) {
c->set_is_trivial_move(IsInputNonOverlapping(c));
}
// validate that all the chosen files of L0 are non overlapping in time
#ifndef NDEBUG
SequenceNumber prev_smallest_seqno = 0U;

View File

@ -105,6 +105,12 @@ class CompactionPicker {
const VersionStorageInfo* vstorage,
const CompactionOptions& compact_options) const;
// Used in universal compaction when the enabled_trivial_move
// option is set. Checks whether there are any overlapping files
// in the input. Returns true if the input files are non
// overlapping.
bool IsInputNonOverlapping(Compaction* c);
protected:
int NumberLevels() const { return ioptions_.num_levels; }

View File

@ -77,6 +77,8 @@ class CompactionPickerTest : public testing::Test {
f->fd = FileDescriptor(file_number, path_id, file_size);
f->smallest = InternalKey(smallest, smallest_seq, kTypeValue);
f->largest = InternalKey(largest, largest_seq, kTypeValue);
f->smallest_seqno = smallest_seq;
f->largest_seqno = largest_seq;
f->compensated_file_size = file_size;
f->refs = 0;
vstorage_->AddFile(level, f);
@ -365,6 +367,64 @@ TEST_F(CompactionPickerTest, NeedsCompactionUniversal) {
vstorage_->CompactionScore(0) >= 1);
}
}
// Tests if the files can be trivially moved in multi level
// universal compaction when allow_trivial_move option is set
// In this test as the input files overlaps, they cannot
// be trivially moved.
TEST_F(CompactionPickerTest, CannotTrivialMoveUniversal) {
const uint64_t kFileSize = 100000;
ioptions_.compaction_options_universal.allow_trivial_move = true;
NewVersionStorage(1, kCompactionStyleUniversal);
UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_);
// must return false when there's no files.
ASSERT_EQ(universal_compaction_picker.NeedsCompaction(vstorage_.get()),
false);
NewVersionStorage(3, kCompactionStyleUniversal);
Add(0, 1U, "150", "200", kFileSize, 0, 500, 550);
Add(0, 2U, "201", "250", kFileSize, 0, 401, 450);
Add(0, 4U, "260", "300", kFileSize, 0, 260, 300);
Add(1, 5U, "100", "151", kFileSize, 0, 200, 251);
Add(1, 3U, "301", "350", kFileSize, 0, 101, 150);
Add(2, 6U, "120", "200", kFileSize, 0, 20, 100);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(
universal_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(!compaction->is_trivial_move());
}
// Tests if the files can be trivially moved in multi level
// universal compaction when allow_trivial_move option is set
// In this test as the input files doesn't overlaps, they should
// be trivially moved.
TEST_F(CompactionPickerTest, AllowsTrivialMoveUniversal) {
const uint64_t kFileSize = 100000;
ioptions_.compaction_options_universal.allow_trivial_move = true;
UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_);
NewVersionStorage(3, kCompactionStyleUniversal);
Add(0, 1U, "150", "200", kFileSize, 0, 500, 550);
Add(0, 2U, "201", "250", kFileSize, 0, 401, 450);
Add(0, 4U, "260", "300", kFileSize, 0, 260, 300);
Add(1, 5U, "010", "080", kFileSize, 0, 200, 251);
Add(2, 3U, "301", "350", kFileSize, 0, 101, 150);
UpdateVersionStorageInfo();
std::unique_ptr<Compaction> compaction(
universal_compaction_picker.PickCompaction(
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
ASSERT_TRUE(compaction->is_trivial_move());
}
TEST_F(CompactionPickerTest, NeedsCompactionFIFO) {
NewVersionStorage(1, kCompactionStyleFIFO);

View File

@ -2538,8 +2538,12 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, JobContext* job_context,
// Move files to next level
int32_t moved_files = 0;
int64_t moved_bytes = 0;
for (size_t i = 0; i < c->num_input_files(0); i++) {
FileMetaData* f = c->input(0, i);
for (unsigned int l = 0; l < c->num_input_levels(); l++) {
if (l == static_cast<unsigned int>(c->output_level())) {
continue;
}
for (size_t i = 0; i < c->num_input_files(l); i++) {
FileMetaData* f = c->input(l, i);
c->edit()->DeleteFile(c->level(), f->fd.GetNumber());
c->edit()->AddFile(c->output_level(), f->fd.GetNumber(),
f->fd.GetPathId(), f->fd.GetFileSize(), f->smallest,
@ -2548,11 +2552,13 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, JobContext* job_context,
LogToBuffer(log_buffer,
"[%s] Moving #%" PRIu64 " to level-%d %" PRIu64 " bytes\n",
c->column_family_data()->GetName().c_str(), f->fd.GetNumber(),
c->output_level(), f->fd.GetFileSize());
c->column_family_data()->GetName().c_str(),
f->fd.GetNumber(), c->output_level(), f->fd.GetFileSize());
++moved_files;
moved_bytes += f->fd.GetFileSize();
}
}
status = versions_->LogAndApply(c->column_family_data(),
*c->mutable_cf_options(), c->edit(),
&mutex_, directories_.GetDbDir());

View File

@ -4528,7 +4528,50 @@ TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionMultiLevels) {
ASSERT_EQ(Get(1, Key(i % num_keys)), Key(i));
}
}
// Tests universal compaction with trivial move enabled
TEST_P(DBTestUniversalCompactionMultiLevels, UniversalCompactionTrivialMove) {
int32_t trivial_move = 0;
int32_t non_trivial_move = 0;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:TrivialMove",
[&](void* arg) { trivial_move++; });
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DBImpl::BackgroundCompaction:NonTrivial",
[&](void* arg) { non_trivial_move++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
Options options;
options.compaction_style = kCompactionStyleUniversal;
options.compaction_options_universal.allow_trivial_move = true;
options.num_levels = 3;
options.write_buffer_size = 100 << 10; // 100KB
options.level0_file_num_compaction_trigger = 3;
options.max_background_compactions = 1;
options.target_file_size_base = 32 * 1024;
options = CurrentOptions(options);
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
// Trigger compaction if size amplification exceeds 110%
options.compaction_options_universal.max_size_amplification_percent = 110;
options = CurrentOptions(options);
ReopenWithColumnFamilies({"default", "pikachu"}, options);
Random rnd(301);
int num_keys = 15000;
for (int i = 0; i < num_keys; i++) {
ASSERT_OK(Put(1, Key(i), Key(i)));
}
std::vector<std::string> values;
ASSERT_OK(Flush(1));
dbfull()->TEST_WaitForCompact();
ASSERT_GT(trivial_move, 0);
ASSERT_EQ(non_trivial_move, 0);
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
INSTANTIATE_TEST_CASE_P(DBTestUniversalCompactionMultiLevels,
DBTestUniversalCompactionMultiLevels,
::testing::Values(3, 20));

View File

@ -69,6 +69,11 @@ class CompactionOptionsUniversal {
// Default: kCompactionStopStyleTotalSize
CompactionStopStyle stop_style;
// Option to optimize the universal multi level compaction by enabling
// trivial move for non overlapping files.
// Default: false
bool allow_trivial_move;
// Default set of parameters
CompactionOptionsUniversal()
: size_ratio(1),
@ -76,7 +81,8 @@ class CompactionOptionsUniversal {
max_merge_width(UINT_MAX),
max_size_amplification_percent(200),
compression_size_percent(-1),
stop_style(kCompactionStopStyleTotalSize) {}
stop_style(kCompactionStopStyleTotalSize),
allow_trivial_move(false) {}
};
} // namespace rocksdb