No need for files_by_size_ in universal compaction

Summary: files_by_size_ is sorted by time in case of universal compaction. However, Version::files_ is also sorted by time. So no need for files_by_size_

Test Plan:
1) make check with the change
2) make check with `assert(last_index == c->input_version_->files_[level].size() - 1);` in compaction picker

Reviewers: dhruba, haobo, yhchiang, sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D19125
This commit is contained in:
Igor Canadi 2014-07-01 08:55:04 +02:00
parent 5656367416
commit a2e0d890ed
3 changed files with 41 additions and 81 deletions

View File

@ -585,15 +585,9 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version,
newerfile = f; newerfile = f;
} }
// The files are sorted from newest first to oldest last.
std::vector<int>& file_by_time = c->input_version_->files_by_size_[level];
// Is the earliest file part of this compaction? // Is the earliest file part of this compaction?
int last_index = file_by_time[file_by_time.size()-1]; FileMetaData* last_file = c->input_version_->files_[level].back();
FileMetaData* last_file = c->input_version_->files_[level][last_index]; c->bottommost_level_ = c->inputs_[0].back() == last_file;
if (c->inputs_[0][c->inputs_[0].size()-1] == last_file) {
c->bottommost_level_ = true;
}
// update statistics // update statistics
MeasureTime(options_->statistics.get(), NUM_FILES_IN_SINGLE_COMPACTION, MeasureTime(options_->statistics.get(), NUM_FILES_IN_SINGLE_COMPACTION,
@ -628,12 +622,12 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
options_->compaction_options_universal.max_merge_width; options_->compaction_options_universal.max_merge_width;
// The files are sorted from newest first to oldest last. // The files are sorted from newest first to oldest last.
std::vector<int>& file_by_time = version->files_by_size_[level]; const auto& files = version->files_[level];
FileMetaData* f = nullptr; FileMetaData* f = nullptr;
bool done = false; bool done = false;
int start_index = 0; int start_index = 0;
unsigned int candidate_count = 0; unsigned int candidate_count = 0;
assert(file_by_time.size() == version->files_[level].size());
unsigned int max_files_to_compact = std::min(max_merge_width, unsigned int max_files_to_compact = std::min(max_merge_width,
max_number_of_files_to_compact); max_number_of_files_to_compact);
@ -641,14 +635,13 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
// Considers a candidate file only if it is smaller than the // Considers a candidate file only if it is smaller than the
// total size accumulated so far. // total size accumulated so far.
for (unsigned int loop = 0; loop < file_by_time.size(); loop++) { for (unsigned int loop = 0; loop < files.size(); loop++) {
candidate_count = 0; candidate_count = 0;
// Skip files that are already being compacted // Skip files that are already being compacted
for (f = nullptr; loop < file_by_time.size(); loop++) { for (f = nullptr; loop < files.size(); loop++) {
int index = file_by_time[loop]; f = files[loop];
f = version->files_[level][index];
if (!f->being_compacted) { if (!f->being_compacted) {
candidate_count = 1; candidate_count = 1;
@ -670,11 +663,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
} }
// Check if the suceeding files need compaction. // Check if the suceeding files need compaction.
for (unsigned int i = loop+1; for (unsigned int i = loop + 1;
candidate_count < max_files_to_compact && i < file_by_time.size(); candidate_count < max_files_to_compact && i < files.size(); i++) {
i++) { FileMetaData* f = files[i];
int index = file_by_time[i];
FileMetaData* f = version->files_[level][index];
if (f->being_compacted) { if (f->being_compacted) {
break; break;
} }
@ -713,14 +704,14 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
break; break;
} else { } else {
for (unsigned int i = loop; for (unsigned int i = loop;
i < loop + candidate_count && i < file_by_time.size(); i++) { i < loop + candidate_count && i < files.size(); i++) {
int index = file_by_time[i]; FileMetaData* f = files[i];
FileMetaData* f = version->files_[level][index]; LogToBuffer(log_buffer, "[%s] Universal: Skipping file %" PRIu64
LogToBuffer(log_buffer, "[%d] with size %" PRIu64
"[%s] Universal: Skipping file %" PRIu64 "[%d] " " (compensated size %" PRIu64 ") %d\n",
"with size %" PRIu64 " (compensated size %" PRIu64 ") %d\n", version->cfd_->GetName().c_str(), f->fd.GetNumber(), i,
version->cfd_->GetName().c_str(), f->fd.GetNumber(), f->fd.GetFileSize(), f->compensated_file_size,
i, f->fd.GetFileSize(), f->compensated_file_size, f->being_compacted); f->being_compacted);
} }
} }
} }
@ -736,10 +727,9 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
if (ratio_to_compress >= 0) { if (ratio_to_compress >= 0) {
uint64_t total_size = version->NumLevelBytes(level); uint64_t total_size = version->NumLevelBytes(level);
uint64_t older_file_size = 0; uint64_t older_file_size = 0;
for (unsigned int i = file_by_time.size() - 1; i >= first_index_after; for (unsigned int i = files.size() - 1;
i--) { i >= first_index_after; i--) {
older_file_size += older_file_size += files[i]->fd.GetFileSize();
version->files_[level][file_by_time[i]]->fd.GetFileSize();
if (older_file_size * 100L >= total_size * (long) ratio_to_compress) { if (older_file_size * 100L >= total_size * (long) ratio_to_compress) {
enable_compression = false; enable_compression = false;
break; break;
@ -752,8 +742,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalReadAmp(
c->score_ = score; c->score_ = score;
for (unsigned int i = start_index; i < first_index_after; i++) { for (unsigned int i = start_index; i < first_index_after; i++) {
int index = file_by_time[i]; FileMetaData* f = c->input_version_->files_[level][i];
FileMetaData* f = c->input_version_->files_[level][index];
c->inputs_[0].push_back(f); c->inputs_[0].push_back(f);
LogToBuffer(log_buffer, LogToBuffer(log_buffer,
"[%s] Universal: Picking file %" PRIu64 "[%d] " "[%s] Universal: Picking file %" PRIu64 "[%d] "
@ -780,8 +769,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
max_size_amplification_percent; max_size_amplification_percent;
// The files are sorted from newest first to oldest last. // The files are sorted from newest first to oldest last.
std::vector<int>& file_by_time = version->files_by_size_[level]; const auto& files = version->files_[level];
assert(file_by_time.size() == version->files_[level].size());
unsigned int candidate_count = 0; unsigned int candidate_count = 0;
uint64_t candidate_size = 0; uint64_t candidate_size = 0;
@ -789,9 +777,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
FileMetaData* f = nullptr; FileMetaData* f = nullptr;
// Skip files that are already being compacted // Skip files that are already being compacted
for (unsigned int loop = 0; loop < file_by_time.size() - 1; loop++) { for (unsigned int loop = 0; loop < files.size() - 1; loop++) {
int index = file_by_time[loop]; f = files[loop];
f = version->files_[level][index];
if (!f->being_compacted) { if (!f->being_compacted) {
start_index = loop; // Consider this as the first candidate. start_index = loop; // Consider this as the first candidate.
break; break;
@ -812,10 +799,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
" to reduce size amp.\n"); " to reduce size amp.\n");
// keep adding up all the remaining files // keep adding up all the remaining files
for (unsigned int loop = start_index; loop < file_by_time.size() - 1; for (unsigned int loop = start_index; loop < files.size() - 1; loop++) {
loop++) { f = files[loop];
int index = file_by_time[loop];
f = version->files_[level][index];
if (f->being_compacted) { if (f->being_compacted) {
LogToBuffer( LogToBuffer(
log_buffer, log_buffer,
@ -832,8 +817,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
} }
// size of earliest file // size of earliest file
int index = file_by_time[file_by_time.size() - 1]; uint64_t earliest_file_size = files.back()->fd.GetFileSize();
uint64_t earliest_file_size = version->files_[level][index]->fd.GetFileSize();
// size amplification = percentage of additional size // size amplification = percentage of additional size
if (candidate_size * 100 < ratio * earliest_file_size) { if (candidate_size * 100 < ratio * earliest_file_size) {
@ -850,7 +834,7 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
"earliest-file-size %" PRIu64, "earliest-file-size %" PRIu64,
version->cfd_->GetName().c_str(), candidate_size, earliest_file_size); version->cfd_->GetName().c_str(), candidate_size, earliest_file_size);
} }
assert(start_index >= 0 && start_index < file_by_time.size() - 1); assert(start_index >= 0 && start_index < files.size() - 1);
// create a compaction request // create a compaction request
// We always compact all the files, so always compress. // We always compact all the files, so always compress.
@ -858,9 +842,8 @@ Compaction* UniversalCompactionPicker::PickCompactionUniversalSizeAmp(
new Compaction(version, level, level, MaxFileSizeForLevel(level), new Compaction(version, level, level, MaxFileSizeForLevel(level),
LLONG_MAX, false, true); LLONG_MAX, false, true);
c->score_ = score; c->score_ = score;
for (unsigned int loop = start_index; loop < file_by_time.size(); loop++) { for (unsigned int loop = start_index; loop < files.size(); loop++) {
int index = file_by_time[loop]; f = c->input_version_->files_[level][loop];
f = c->input_version_->files_[level][index];
c->inputs_[0].push_back(f); c->inputs_[0].push_back(f);
LogToBuffer(log_buffer, LogToBuffer(log_buffer,
"[%s] Universal: size amp picking file %" PRIu64 "[%d] " "[%s] Universal: size amp picking file %" PRIu64 "[%d] "

View File

@ -861,7 +861,6 @@ void Version::ComputeCompactionScore(
} }
namespace { namespace {
// Compator that is used to sort files based on their size // Compator that is used to sort files based on their size
// In normal mode: descending size // In normal mode: descending size
bool CompareCompensatedSizeDescending(const Version::Fsize& first, bool CompareCompensatedSizeDescending(const Version::Fsize& first,
@ -869,18 +868,6 @@ bool CompareCompensatedSizeDescending(const Version::Fsize& first,
return (first.file->compensated_file_size > return (first.file->compensated_file_size >
second.file->compensated_file_size); second.file->compensated_file_size);
} }
// A static compator used to sort files based on their seqno
// In universal style : descending seqno
bool CompareSeqnoDescending(const Version::Fsize& first,
const Version::Fsize& second) {
if (first.file->smallest_seqno > second.file->smallest_seqno) {
assert(first.file->largest_seqno > second.file->largest_seqno);
return true;
}
assert(first.file->largest_seqno <= second.file->largest_seqno);
return false;
}
} // anonymous namespace } // anonymous namespace
void Version::UpdateNumNonEmptyLevels() { void Version::UpdateNumNonEmptyLevels() {
@ -895,19 +882,15 @@ void Version::UpdateNumNonEmptyLevels() {
} }
void Version::UpdateFilesBySize() { void Version::UpdateFilesBySize() {
if (cfd_->options()->compaction_style == kCompactionStyleFIFO) { if (cfd_->options()->compaction_style == kCompactionStyleFIFO ||
cfd_->options()->compaction_style == kCompactionStyleUniversal) {
// don't need this // don't need this
return; return;
} }
// No need to sort the highest level because it is never compacted. // No need to sort the highest level because it is never compacted.
int max_level = for (int level = 0; level < NumberLevels() - 1; level++) {
(cfd_->options()->compaction_style == kCompactionStyleUniversal)
? NumberLevels()
: NumberLevels() - 1;
for (int level = 0; level < max_level; level++) {
const std::vector<FileMetaData*>& files = files_[level]; const std::vector<FileMetaData*>& files = files_[level];
std::vector<int>& files_by_size = files_by_size_[level]; auto& files_by_size = files_by_size_[level];
assert(files_by_size.size() == 0); assert(files_by_size.size() == 0);
// populate a temp vector for sorting based on size // populate a temp vector for sorting based on size
@ -918,18 +901,12 @@ void Version::UpdateFilesBySize() {
} }
// sort the top number_of_files_to_sort_ based on file size // sort the top number_of_files_to_sort_ based on file size
if (cfd_->options()->compaction_style == kCompactionStyleUniversal) { size_t num = Version::number_of_files_to_sort_;
int num = temp.size(); if (num > temp.size()) {
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
CompareSeqnoDescending);
} else {
int num = Version::number_of_files_to_sort_;
if (num > (int)temp.size()) {
num = temp.size(); num = temp.size();
} }
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(), std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
CompareCompensatedSizeDescending); CompareCompensatedSizeDescending);
}
assert(temp.size() == files.size()); assert(temp.size() == files.size());
// initialize files_by_size_ // initialize files_by_size_

View File

@ -294,7 +294,7 @@ class Version {
// that on a running system, we need to look at only the first // that on a running system, we need to look at only the first
// few largest files because a new version is created every few // few largest files because a new version is created every few
// seconds/minutes (because of concurrent compactions). // seconds/minutes (because of concurrent compactions).
static const int number_of_files_to_sort_ = 50; static const size_t number_of_files_to_sort_ = 50;
// Level that should be compacted next and its compaction score. // Level that should be compacted next and its compaction score.
// Score < 1 means compaction is not strictly needed. These fields // Score < 1 means compaction is not strictly needed. These fields