2014-01-16 01:22:34 +01:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#include "db/compaction.h"
|
2014-05-14 21:13:50 +02:00
|
|
|
|
|
|
|
#define __STDC_FORMAT_MACROS
|
|
|
|
#include <inttypes.h>
|
|
|
|
#include <vector>
|
|
|
|
|
2014-02-01 01:45:20 +01:00
|
|
|
#include "db/column_family.h"
|
2014-05-14 21:13:50 +02:00
|
|
|
#include "util/logging.h"
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
|
|
|
uint64_t sum = 0;
|
|
|
|
for (size_t i = 0; i < files.size() && files[i]; i++) {
|
2014-06-14 00:54:19 +02:00
|
|
|
sum += files[i]->fd.GetFileSize();
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
Compaction::Compaction(Version* input_version, int start_level, int out_level,
|
2014-01-16 01:22:34 +01:00
|
|
|
uint64_t target_file_size,
|
|
|
|
uint64_t max_grandparent_overlap_bytes,
|
2014-07-02 18:54:20 +02:00
|
|
|
uint32_t output_path_id,
|
2014-07-02 20:40:57 +02:00
|
|
|
CompressionType output_compression, bool seek_compaction,
|
2014-05-21 20:43:35 +02:00
|
|
|
bool deletion_compaction)
|
2014-07-17 23:36:41 +02:00
|
|
|
: start_level_(start_level),
|
|
|
|
output_level_(out_level),
|
2014-01-16 01:22:34 +01:00
|
|
|
max_output_file_size_(target_file_size),
|
2014-01-22 19:55:16 +01:00
|
|
|
max_grandparent_overlap_bytes_(max_grandparent_overlap_bytes),
|
2014-01-16 01:22:34 +01:00
|
|
|
input_version_(input_version),
|
|
|
|
number_levels_(input_version_->NumberLevels()),
|
2014-02-01 01:45:20 +01:00
|
|
|
cfd_(input_version_->cfd_),
|
2014-07-02 18:54:20 +02:00
|
|
|
output_path_id_(output_path_id),
|
2014-07-02 20:40:57 +02:00
|
|
|
output_compression_(output_compression),
|
2014-01-16 01:22:34 +01:00
|
|
|
seek_compaction_(seek_compaction),
|
2014-05-21 20:43:35 +02:00
|
|
|
deletion_compaction_(deletion_compaction),
|
2014-01-16 01:22:34 +01:00
|
|
|
grandparent_index_(0),
|
|
|
|
seen_key_(false),
|
|
|
|
overlapped_bytes_(0),
|
|
|
|
base_index_(-1),
|
|
|
|
parent_index_(-1),
|
|
|
|
score_(0),
|
|
|
|
bottommost_level_(false),
|
|
|
|
is_full_compaction_(false),
|
2014-02-12 21:24:18 +01:00
|
|
|
is_manual_compaction_(false),
|
2014-01-16 01:22:34 +01:00
|
|
|
level_ptrs_(std::vector<size_t>(number_levels_)) {
|
|
|
|
|
2014-02-11 02:04:44 +01:00
|
|
|
cfd_->Ref();
|
2014-01-16 01:22:34 +01:00
|
|
|
input_version_->Ref();
|
|
|
|
edit_ = new VersionEdit();
|
2014-02-01 01:45:20 +01:00
|
|
|
edit_->SetColumnFamily(cfd_->GetID());
|
2014-01-16 01:22:34 +01:00
|
|
|
for (int i = 0; i < number_levels_; i++) {
|
|
|
|
level_ptrs_[i] = 0;
|
|
|
|
}
|
2014-07-17 23:36:41 +02:00
|
|
|
int num_levels = output_level_ - start_level_ + 1;
|
2014-07-23 19:22:21 +02:00
|
|
|
input_levels_.resize(num_levels);
|
2014-07-17 23:36:41 +02:00
|
|
|
inputs_.resize(num_levels);
|
|
|
|
for (int i = 0; i < num_levels; ++i) {
|
|
|
|
inputs_[i].level = start_level_ + i;
|
2014-07-17 03:12:17 +02:00
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
Compaction::~Compaction() {
|
|
|
|
delete edit_;
|
|
|
|
if (input_version_ != nullptr) {
|
|
|
|
input_version_->Unref();
|
|
|
|
}
|
2014-02-11 02:04:44 +01:00
|
|
|
if (cfd_ != nullptr) {
|
|
|
|
if (cfd_->Unref()) {
|
|
|
|
delete cfd_;
|
|
|
|
}
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-07-11 21:52:41 +02:00
|
|
|
void Compaction::GenerateFileLevels() {
|
2014-07-25 02:06:00 +02:00
|
|
|
input_levels_.resize(num_input_levels());
|
|
|
|
for (int which = 0; which < num_input_levels(); which++) {
|
2014-07-17 03:12:17 +02:00
|
|
|
DoGenerateFileLevel(&input_levels_[which], inputs_[which].files, &arena_);
|
2014-07-11 21:52:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
bool Compaction::IsTrivialMove() const {
|
|
|
|
// Avoid a move if there is lots of overlapping grandparent data.
|
|
|
|
// Otherwise, the move could create a parent file that will require
|
|
|
|
// a very expensive merge later on.
|
2014-07-17 23:36:41 +02:00
|
|
|
// If start_level_== output_level_, the purpose is to force compaction
|
|
|
|
// filter to be applied to that level, and thus cannot be a trivia move.
|
|
|
|
return (start_level_ != output_level_ &&
|
|
|
|
num_input_levels() == 2 &&
|
2014-01-16 01:22:34 +01:00
|
|
|
num_input_files(0) == 1 &&
|
|
|
|
num_input_files(1) == 0 &&
|
2014-01-22 19:55:16 +01:00
|
|
|
TotalFileSize(grandparents_) <= max_grandparent_overlap_bytes_);
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Compaction::AddInputDeletions(VersionEdit* edit) {
|
2014-07-17 23:36:41 +02:00
|
|
|
for (int which = 0; which < num_input_levels(); which++) {
|
2014-01-16 01:22:34 +01:00
|
|
|
for (size_t i = 0; i < inputs_[which].size(); i++) {
|
2014-07-17 23:36:41 +02:00
|
|
|
edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber());
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
bool Compaction::KeyNotExistsBeyondOutputLevel(const Slice& user_key) {
|
2014-05-21 20:43:35 +02:00
|
|
|
assert(cfd_->options()->compaction_style != kCompactionStyleFIFO);
|
2014-02-03 21:08:33 +01:00
|
|
|
if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
|
2014-01-16 01:22:34 +01:00
|
|
|
return bottommost_level_;
|
|
|
|
}
|
|
|
|
// Maybe use binary search to find right entry instead of linear search?
|
2014-02-03 21:08:33 +01:00
|
|
|
const Comparator* user_cmp = cfd_->user_comparator();
|
2014-07-17 23:36:41 +02:00
|
|
|
for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
|
2014-01-16 01:22:34 +01:00
|
|
|
const std::vector<FileMetaData*>& files = input_version_->files_[lvl];
|
|
|
|
for (; level_ptrs_[lvl] < files.size(); ) {
|
|
|
|
FileMetaData* f = files[level_ptrs_[lvl]];
|
|
|
|
if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
|
|
|
|
// We've advanced far enough
|
|
|
|
if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
|
2014-07-17 23:36:41 +02:00
|
|
|
// Key falls in this file's range, so definitely
|
|
|
|
// exists beyond output level
|
2014-01-16 01:22:34 +01:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
level_ptrs_[lvl]++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Compaction::ShouldStopBefore(const Slice& internal_key) {
|
|
|
|
// Scan to find earliest grandparent file that contains key.
|
2014-02-03 21:08:33 +01:00
|
|
|
const InternalKeyComparator* icmp = &cfd_->internal_comparator();
|
2014-01-16 01:22:34 +01:00
|
|
|
while (grandparent_index_ < grandparents_.size() &&
|
|
|
|
icmp->Compare(internal_key,
|
|
|
|
grandparents_[grandparent_index_]->largest.Encode()) > 0) {
|
|
|
|
if (seen_key_) {
|
2014-06-14 00:54:19 +02:00
|
|
|
overlapped_bytes_ += grandparents_[grandparent_index_]->fd.GetFileSize();
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
assert(grandparent_index_ + 1 >= grandparents_.size() ||
|
|
|
|
icmp->Compare(grandparents_[grandparent_index_]->largest.Encode(),
|
|
|
|
grandparents_[grandparent_index_+1]->smallest.Encode())
|
|
|
|
< 0);
|
|
|
|
grandparent_index_++;
|
|
|
|
}
|
|
|
|
seen_key_ = true;
|
|
|
|
|
2014-01-22 19:55:16 +01:00
|
|
|
if (overlapped_bytes_ > max_grandparent_overlap_bytes_) {
|
2014-01-16 01:22:34 +01:00
|
|
|
// Too much overlap for current output; start new output
|
|
|
|
overlapped_bytes_ = 0;
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark (or clear) each file that is being compacted
|
2014-07-17 23:36:41 +02:00
|
|
|
void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
|
|
|
|
for (int i = 0; i < num_input_levels(); i++) {
|
2014-01-16 01:22:34 +01:00
|
|
|
for (unsigned int j = 0; j < inputs_[i].size(); j++) {
|
2014-07-17 23:36:41 +02:00
|
|
|
assert(mark_as_compacted ? !inputs_[i][j]->being_compacted :
|
|
|
|
inputs_[i][j]->being_compacted);
|
|
|
|
inputs_[i][j]->being_compacted = mark_as_compacted;
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Is this compaction producing files at the bottommost level?
|
2014-07-17 23:36:41 +02:00
|
|
|
void Compaction::SetupBottomMostLevel(bool is_manual) {
|
2014-05-21 20:43:35 +02:00
|
|
|
assert(cfd_->options()->compaction_style != kCompactionStyleFIFO);
|
2014-02-03 21:08:33 +01:00
|
|
|
if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
|
2014-01-16 01:22:34 +01:00
|
|
|
// If universal compaction style is used and manual
|
|
|
|
// compaction is occuring, then we are guaranteed that
|
|
|
|
// all files will be picked in a single compaction
|
|
|
|
// run. We can safely set bottommost_level_ = true.
|
|
|
|
// If it is not manual compaction, then bottommost_level_
|
|
|
|
// is already set when the Compaction was created.
|
2014-07-17 23:36:41 +02:00
|
|
|
if (is_manual) {
|
2014-01-16 01:22:34 +01:00
|
|
|
bottommost_level_ = true;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
bottommost_level_ = true;
|
2014-07-17 23:36:41 +02:00
|
|
|
// checks whether there are files living beyond the output_level.
|
|
|
|
for (int i = output_level_ + 1; i < number_levels_; i++) {
|
2014-01-16 01:22:34 +01:00
|
|
|
if (input_version_->NumLevelFiles(i) > 0) {
|
|
|
|
bottommost_level_ = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Compaction::ReleaseInputs() {
|
|
|
|
if (input_version_ != nullptr) {
|
|
|
|
input_version_->Unref();
|
|
|
|
input_version_ = nullptr;
|
|
|
|
}
|
2014-02-11 02:04:44 +01:00
|
|
|
if (cfd_ != nullptr) {
|
|
|
|
if (cfd_->Unref()) {
|
|
|
|
delete cfd_;
|
|
|
|
}
|
|
|
|
cfd_ = nullptr;
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-02-01 01:45:20 +01:00
|
|
|
void Compaction::ReleaseCompactionFiles(Status status) {
|
|
|
|
cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
void Compaction::ResetNextCompactionIndex() {
|
2014-07-17 23:36:41 +02:00
|
|
|
input_version_->ResetNextCompactionIndex(start_level_);
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-05-14 21:13:50 +02:00
|
|
|
namespace {
|
|
|
|
int InputSummary(const std::vector<FileMetaData*>& files, char* output,
|
|
|
|
int len) {
|
2014-03-20 00:01:25 +01:00
|
|
|
*output = '\0';
|
2014-01-16 01:22:34 +01:00
|
|
|
int write = 0;
|
|
|
|
for (unsigned int i = 0; i < files.size(); i++) {
|
|
|
|
int sz = len - write;
|
2013-12-23 17:54:50 +01:00
|
|
|
int ret;
|
|
|
|
char sztxt[16];
|
2014-06-14 00:54:19 +02:00
|
|
|
AppendHumanBytes(files.at(i)->fd.GetFileSize(), sztxt, 16);
|
|
|
|
ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ",
|
|
|
|
files.at(i)->fd.GetNumber(), sztxt);
|
2014-05-14 21:13:50 +02:00
|
|
|
if (ret < 0 || ret >= sz) break;
|
2014-01-16 01:22:34 +01:00
|
|
|
write += ret;
|
|
|
|
}
|
2014-05-14 21:13:50 +02:00
|
|
|
// if files.size() is non-zero, overwrite the last space
|
|
|
|
return write - !!files.size();
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
2014-05-14 21:13:50 +02:00
|
|
|
} // namespace
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
void Compaction::Summary(char* output, int len) {
|
2014-05-14 21:13:50 +02:00
|
|
|
int write =
|
|
|
|
snprintf(output, len, "Base version %" PRIu64
|
|
|
|
" Base level %d, seek compaction:%d, inputs: [",
|
2014-07-17 23:36:41 +02:00
|
|
|
input_version_->GetVersionNumber(),
|
|
|
|
start_level_, seek_compaction_);
|
2014-01-25 23:12:24 +01:00
|
|
|
if (write < 0 || write >= len) {
|
2014-01-16 01:22:34 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
for (int level = 0; level < num_input_levels(); ++level) {
|
|
|
|
if (level > 0) {
|
|
|
|
write += snprintf(output + write, len - write, "], [");
|
|
|
|
if (write < 0 || write >= len) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
write += InputSummary(inputs_[level].files, output + write, len - write);
|
|
|
|
if (write < 0 || write >= len) {
|
|
|
|
return;
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-05-14 21:13:50 +02:00
|
|
|
snprintf(output + write, len - write, "]");
|
2014-01-16 01:22:34 +01:00
|
|
|
}
|
|
|
|
|
2014-06-05 22:19:35 +02:00
|
|
|
uint64_t Compaction::OutputFilePreallocationSize() {
|
|
|
|
uint64_t preallocation_size = 0;
|
|
|
|
|
|
|
|
if (cfd_->options()->compaction_style == kCompactionStyleLevel) {
|
|
|
|
preallocation_size =
|
|
|
|
cfd_->compaction_picker()->MaxFileSizeForLevel(output_level());
|
|
|
|
} else {
|
2014-07-17 23:36:41 +02:00
|
|
|
for (int level = 0; level < num_input_levels(); ++level) {
|
|
|
|
for (const auto& f : inputs_[level].files) {
|
|
|
|
preallocation_size += f->fd.GetFileSize();
|
|
|
|
}
|
2014-06-05 22:19:35 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// Over-estimate slightly so we don't end up just barely crossing
|
|
|
|
// the threshold
|
|
|
|
return preallocation_size * 1.1;
|
|
|
|
}
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
} // namespace rocksdb
|