3c37b3cccd
Summary: Up to this point, the subcompactions that make up a compaction job have been divided based on the key range of the L1 files, and each subcompaction has handled the key range of only one file. However DBOption.max_subcompactions allows the user to designate how many subcompactions at most to perform. This patch updates the CompactionJob::GetSubcompactionBoundaries() to determine these divisions accordingly based on that option and other input/system factors. The current approach orders the starting and/or ending keys of certain compaction input files and then generates a histogram to approximate the size covered by the key range between each consecutive pair of keys. Then it groups these ranges into groups so that the sizes are approximately equal to one another. The approach has also been adapted to work for universal compaction as well instead of just for level-based compaction as it was before. These subcompactions are then executed in parallel by locally spawning threads, one for each. The results are then aggregated and the compaction completed. Test Plan: make all && make check Reviewers: yhchiang, anthony, igor, noetzli, sdong Reviewed By: sdong Subscribers: MarkCallaghan, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D43269
398 lines
13 KiB
C++
398 lines
13 KiB
C++
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under the BSD-style license found in the
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "db/compaction.h"
|
|
|
|
#ifndef __STDC_FORMAT_MACROS
|
|
#define __STDC_FORMAT_MACROS
|
|
#endif
|
|
|
|
#include <inttypes.h>
|
|
#include <vector>
|
|
|
|
#include "rocksdb/compaction_filter.h"
|
|
#include "db/column_family.h"
|
|
#include "util/logging.h"
|
|
#include "util/sync_point.h"
|
|
|
|
namespace rocksdb {
|
|
|
|
uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
|
uint64_t sum = 0;
|
|
for (size_t i = 0; i < files.size() && files[i]; i++) {
|
|
sum += files[i]->fd.GetFileSize();
|
|
}
|
|
return sum;
|
|
}
|
|
|
|
void Compaction::SetInputVersion(Version* _input_version) {
|
|
input_version_ = _input_version;
|
|
cfd_ = input_version_->cfd();
|
|
|
|
cfd_->Ref();
|
|
input_version_->Ref();
|
|
edit_.SetColumnFamily(cfd_->GetID());
|
|
}
|
|
|
|
// helper function to determine if compaction is creating files at the
|
|
// bottommost level
|
|
bool Compaction::IsBottommostLevel(
|
|
int output_level, VersionStorageInfo* vstorage,
|
|
const std::vector<CompactionInputFiles>& inputs) {
|
|
if (inputs[0].level == 0 &&
|
|
inputs[0].files.back() != vstorage->LevelFiles(0).back()) {
|
|
return false;
|
|
}
|
|
|
|
// checks whether there are files living beyond the output_level.
|
|
for (int i = output_level + 1; i < vstorage->num_levels(); i++) {
|
|
if (vstorage->NumLevelFiles(i) > 0) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Compaction::IsFullCompaction(
|
|
VersionStorageInfo* vstorage,
|
|
const std::vector<CompactionInputFiles>& inputs) {
|
|
int num_files_in_compaction = 0;
|
|
int total_num_files = 0;
|
|
for (int l = 0; l < vstorage->num_levels(); l++) {
|
|
total_num_files += vstorage->NumLevelFiles(l);
|
|
}
|
|
for (size_t i = 0; i < inputs.size(); i++) {
|
|
num_files_in_compaction += inputs[i].size();
|
|
}
|
|
return num_files_in_compaction == total_num_files;
|
|
}
|
|
|
|
Compaction::Compaction(VersionStorageInfo* vstorage,
|
|
const MutableCFOptions& _mutable_cf_options,
|
|
std::vector<CompactionInputFiles> _inputs,
|
|
int _output_level, uint64_t _target_file_size,
|
|
uint64_t _max_grandparent_overlap_bytes,
|
|
uint32_t _output_path_id, CompressionType _compression,
|
|
std::vector<FileMetaData*> _grandparents,
|
|
bool _manual_compaction, double _score,
|
|
bool _deletion_compaction)
|
|
: start_level_(_inputs[0].level),
|
|
output_level_(_output_level),
|
|
max_output_file_size_(_target_file_size),
|
|
max_grandparent_overlap_bytes_(_max_grandparent_overlap_bytes),
|
|
mutable_cf_options_(_mutable_cf_options),
|
|
input_version_(nullptr),
|
|
number_levels_(vstorage->num_levels()),
|
|
cfd_(nullptr),
|
|
output_path_id_(_output_path_id),
|
|
output_compression_(_compression),
|
|
deletion_compaction_(_deletion_compaction),
|
|
inputs_(std::move(_inputs)),
|
|
grandparents_(std::move(_grandparents)),
|
|
grandparent_index_(0),
|
|
seen_key_(false),
|
|
overlapped_bytes_(0),
|
|
score_(_score),
|
|
bottommost_level_(IsBottommostLevel(output_level_, vstorage, inputs_)),
|
|
is_full_compaction_(IsFullCompaction(vstorage, inputs_)),
|
|
is_manual_compaction_(_manual_compaction) {
|
|
MarkFilesBeingCompacted(true);
|
|
|
|
#ifndef NDEBUG
|
|
for (size_t i = 1; i < inputs_.size(); ++i) {
|
|
assert(inputs_[i].level > inputs_[i - 1].level);
|
|
}
|
|
#endif
|
|
|
|
// setup input_levels_
|
|
{
|
|
input_levels_.resize(num_input_levels());
|
|
for (size_t which = 0; which < num_input_levels(); which++) {
|
|
DoGenerateLevelFilesBrief(&input_levels_[which], inputs_[which].files,
|
|
&arena_);
|
|
}
|
|
}
|
|
}
|
|
|
|
Compaction::~Compaction() {
|
|
if (input_version_ != nullptr) {
|
|
input_version_->Unref();
|
|
}
|
|
if (cfd_ != nullptr) {
|
|
if (cfd_->Unref()) {
|
|
delete cfd_;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Compaction::InputCompressionMatchesOutput() const {
|
|
int base_level = input_version_->storage_info()->base_level();
|
|
bool matches = (GetCompressionType(*cfd_->ioptions(), start_level_,
|
|
base_level) == output_compression_);
|
|
if (matches) {
|
|
TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:Matches");
|
|
return true;
|
|
}
|
|
TEST_SYNC_POINT("Compaction::InputCompressionMatchesOutput:DidntMatch");
|
|
return matches;
|
|
}
|
|
|
|
bool Compaction::IsTrivialMove() const {
|
|
// Avoid a move if there is lots of overlapping grandparent data.
|
|
// Otherwise, the move could create a parent file that will require
|
|
// a very expensive merge later on.
|
|
// If start_level_== output_level_, the purpose is to force compaction
|
|
// filter to be applied to that level, and thus cannot be a trivial move.
|
|
|
|
// Check if start level have files with overlapping ranges
|
|
if (start_level_ == 0 &&
|
|
input_version_->storage_info()->level0_non_overlapping() == false) {
|
|
// We cannot move files from L0 to L1 if the files are overlapping
|
|
return false;
|
|
}
|
|
|
|
if (is_manual_compaction_ &&
|
|
(cfd_->ioptions()->compaction_filter != nullptr ||
|
|
cfd_->ioptions()->compaction_filter_factory != nullptr)) {
|
|
// This is a manual compaction and we have a compaction filter that should
|
|
// be executed, we cannot do a trivial move
|
|
return false;
|
|
}
|
|
|
|
// Used in universal compaction, where trivial move can be done if the
|
|
// input files are non overlapping
|
|
if ((cfd_->ioptions()->compaction_options_universal.allow_trivial_move) &&
|
|
(output_level_ != 0)) {
|
|
return is_trivial_move_;
|
|
}
|
|
|
|
return (start_level_ != output_level_ && num_input_levels() == 1 &&
|
|
input(0, 0)->fd.GetPathId() == output_path_id() &&
|
|
InputCompressionMatchesOutput() &&
|
|
TotalFileSize(grandparents_) <= max_grandparent_overlap_bytes_);
|
|
}
|
|
|
|
void Compaction::AddInputDeletions(VersionEdit* out_edit) {
|
|
for (size_t which = 0; which < num_input_levels(); which++) {
|
|
for (size_t i = 0; i < inputs_[which].size(); i++) {
|
|
out_edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber());
|
|
}
|
|
}
|
|
}
|
|
|
|
bool Compaction::KeyNotExistsBeyondOutputLevel(
|
|
const Slice& user_key, std::vector<size_t>* level_ptrs) const {
|
|
assert(input_version_ != nullptr);
|
|
assert(level_ptrs != nullptr);
|
|
assert(level_ptrs->size() == static_cast<size_t>(number_levels_));
|
|
assert(cfd_->ioptions()->compaction_style != kCompactionStyleFIFO);
|
|
if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
|
|
return bottommost_level_;
|
|
}
|
|
// Maybe use binary search to find right entry instead of linear search?
|
|
const Comparator* user_cmp = cfd_->user_comparator();
|
|
for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
|
|
const std::vector<FileMetaData*>& files =
|
|
input_version_->storage_info()->LevelFiles(lvl);
|
|
for (; level_ptrs->at(lvl) < files.size(); level_ptrs->at(lvl)++) {
|
|
auto* f = files[level_ptrs->at(lvl)];
|
|
if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
|
|
// We've advanced far enough
|
|
if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
|
|
// Key falls in this file's range, so definitely
|
|
// exists beyond output level
|
|
return false;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Compaction::ShouldStopBefore(const Slice& internal_key) {
|
|
// Scan to find earliest grandparent file that contains key.
|
|
const InternalKeyComparator* icmp = &cfd_->internal_comparator();
|
|
while (grandparent_index_ < grandparents_.size() &&
|
|
icmp->Compare(internal_key,
|
|
grandparents_[grandparent_index_]->largest.Encode()) > 0) {
|
|
if (seen_key_) {
|
|
overlapped_bytes_ += grandparents_[grandparent_index_]->fd.GetFileSize();
|
|
}
|
|
assert(grandparent_index_ + 1 >= grandparents_.size() ||
|
|
icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(),
|
|
grandparents_[grandparent_index_+1]->smallest.Encode())
|
|
< 0);
|
|
grandparent_index_++;
|
|
}
|
|
seen_key_ = true;
|
|
|
|
if (overlapped_bytes_ > max_grandparent_overlap_bytes_) {
|
|
// Too much overlap for current output; start new output
|
|
overlapped_bytes_ = 0;
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Mark (or clear) each file that is being compacted
|
|
void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
|
|
for (size_t i = 0; i < num_input_levels(); i++) {
|
|
for (unsigned int j = 0; j < inputs_[i].size(); j++) {
|
|
assert(mark_as_compacted ? !inputs_[i][j]->being_compacted :
|
|
inputs_[i][j]->being_compacted);
|
|
inputs_[i][j]->being_compacted = mark_as_compacted;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sample output:
|
|
// If compacting 3 L0 files, 2 L3 files and 1 L4 file, and outputting to L5,
|
|
// print: "3@0 + 2@3 + 1@4 files to L5"
|
|
const char* Compaction::InputLevelSummary(
|
|
InputLevelSummaryBuffer* scratch) const {
|
|
int len = 0;
|
|
bool is_first = true;
|
|
for (auto& input_level : inputs_) {
|
|
if (input_level.empty()) {
|
|
continue;
|
|
}
|
|
if (!is_first) {
|
|
len +=
|
|
snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, " + ");
|
|
} else {
|
|
is_first = false;
|
|
}
|
|
len += snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
|
|
"%" ROCKSDB_PRIszt "@%d", input_level.size(),
|
|
input_level.level);
|
|
}
|
|
snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len,
|
|
" files to L%d", output_level());
|
|
|
|
return scratch->buffer;
|
|
}
|
|
|
|
uint64_t Compaction::CalculateTotalInputSize() const {
|
|
uint64_t size = 0;
|
|
for (auto& input_level : inputs_) {
|
|
for (auto f : input_level.files) {
|
|
size += f->fd.GetFileSize();
|
|
}
|
|
}
|
|
return size;
|
|
}
|
|
|
|
void Compaction::ReleaseCompactionFiles(Status status) {
|
|
MarkFilesBeingCompacted(false);
|
|
cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
|
|
}
|
|
|
|
void Compaction::ResetNextCompactionIndex() {
|
|
assert(input_version_ != nullptr);
|
|
input_version_->storage_info()->ResetNextCompactionIndex(start_level_);
|
|
}
|
|
|
|
namespace {
|
|
int InputSummary(const std::vector<FileMetaData*>& files, char* output,
|
|
int len) {
|
|
*output = '\0';
|
|
int write = 0;
|
|
for (unsigned int i = 0; i < files.size(); i++) {
|
|
int sz = len - write;
|
|
int ret;
|
|
char sztxt[16];
|
|
AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
|
|
ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
|
|
files.at(i)->fd.GetNumber(), sztxt);
|
|
if (ret < 0 || ret >= sz) break;
|
|
write += ret;
|
|
}
|
|
// if files.size() is non-zero, overwrite the last space
|
|
return write - !!files.size();
|
|
}
|
|
} // namespace
|
|
|
|
void Compaction::Summary(char* output, int len) {
|
|
int write =
|
|
snprintf(output, len, "Base version %" PRIu64
|
|
" Base level %d, inputs: [",
|
|
input_version_->GetVersionNumber(),
|
|
start_level_);
|
|
if (write < 0 || write >= len) {
|
|
return;
|
|
}
|
|
|
|
for (size_t level_iter = 0; level_iter < num_input_levels(); ++level_iter) {
|
|
if (level_iter > 0) {
|
|
write += snprintf(output + write, len - write, "], [");
|
|
if (write < 0 || write >= len) {
|
|
return;
|
|
}
|
|
}
|
|
write +=
|
|
InputSummary(inputs_[level_iter].files, output + write, len - write);
|
|
if (write < 0 || write >= len) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
snprintf(output + write, len - write, "]");
|
|
}
|
|
|
|
uint64_t Compaction::OutputFilePreallocationSize() {
|
|
uint64_t preallocation_size = 0;
|
|
|
|
if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel ||
|
|
output_level() > 0) {
|
|
preallocation_size = max_output_file_size_;
|
|
} else {
|
|
// output_level() == 0
|
|
assert(num_input_levels() > 0);
|
|
for (const auto& f : inputs_[0].files) {
|
|
preallocation_size += f->fd.GetFileSize();
|
|
}
|
|
}
|
|
// Over-estimate slightly so we don't end up just barely crossing
|
|
// the threshold
|
|
return preallocation_size * 1.1;
|
|
}
|
|
|
|
std::unique_ptr<CompactionFilter> Compaction::CreateCompactionFilter() const {
|
|
if (!cfd_->ioptions()->compaction_filter_factory) {
|
|
return nullptr;
|
|
}
|
|
|
|
CompactionFilter::Context context;
|
|
context.is_full_compaction = is_full_compaction_;
|
|
context.is_manual_compaction = is_manual_compaction_;
|
|
return cfd_->ioptions()->compaction_filter_factory->CreateCompactionFilter(
|
|
context);
|
|
}
|
|
|
|
bool Compaction::IsOutputLevelEmpty() const {
|
|
return inputs_.back().level != output_level_ || inputs_.back().empty();
|
|
}
|
|
|
|
bool Compaction::ShouldFormSubcompactions() const {
|
|
if (mutable_cf_options_.max_subcompactions <= 1 || cfd_ == nullptr) {
|
|
return false;
|
|
}
|
|
if (cfd_->ioptions()->compaction_style == kCompactionStyleLevel) {
|
|
return start_level_ == 0 && !IsOutputLevelEmpty();
|
|
} else if (cfd_->ioptions()->compaction_style == kCompactionStyleUniversal) {
|
|
return number_levels_ > 1 && output_level_ > 0;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
} // namespace rocksdb
|