Allow class Compaction to handle input files from multiple levels.

Summary:
Allow class Compaction to handle input files from multiple levels.
This diff is a subset of https://reviews.facebook.net/D19263 where
only db/compaction.cc and db/compaction.h are changed.

Test Plan:
make db_test
export ROCKSDB_TESTS=Compaction
./db_test

Reviewers: igor, sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D19923
This commit is contained in:
Yueh-Hsuan Chiang 2014-07-17 14:36:41 -07:00
parent 296e340753
commit 3178510153
3 changed files with 128 additions and 84 deletions

View File

@ -26,14 +26,14 @@ static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
return sum;
}
Compaction::Compaction(Version* input_version, int level, int out_level,
Compaction::Compaction(Version* input_version, int start_level, int out_level,
uint64_t target_file_size,
uint64_t max_grandparent_overlap_bytes,
uint32_t output_path_id,
CompressionType output_compression, bool seek_compaction,
bool deletion_compaction)
: level_(level),
out_level_(out_level),
: start_level_(start_level),
output_level_(out_level),
max_output_file_size_(target_file_size),
max_grandparent_overlap_bytes_(max_grandparent_overlap_bytes),
input_version_(input_version),
@ -61,8 +61,10 @@ Compaction::Compaction(Version* input_version, int level, int out_level,
for (int i = 0; i < number_levels_; i++) {
level_ptrs_[i] = 0;
}
for (int i = 0; i < 2; ++i) {
inputs_[i].level = level_ + i;
int num_levels = output_level_ - start_level_ + 1;
inputs_.resize(num_levels);
for (int i = 0; i < num_levels; ++i) {
inputs_[i].level = start_level_ + i;
}
}
@ -89,39 +91,39 @@ bool Compaction::IsTrivialMove() const {
// Avoid a move if there is lots of overlapping grandparent data.
// Otherwise, the move could create a parent file that will require
// a very expensive merge later on.
// If level_== out_level_, the purpose is to force compaction filter to be
// applied to that level, and thus cannot be a trivia move.
return (level_ != out_level_ &&
// If start_level_== output_level_, the purpose is to force compaction
// filter to be applied to that level, and thus cannot be a trivia move.
return (start_level_ != output_level_ &&
num_input_levels() == 2 &&
num_input_files(0) == 1 &&
num_input_files(1) == 0 &&
TotalFileSize(grandparents_) <= max_grandparent_overlap_bytes_);
}
bool Compaction::IsDeletionCompaction() const { return deletion_compaction_; }
void Compaction::AddInputDeletions(VersionEdit* edit) {
for (int which = 0; which < 2; which++) {
for (int which = 0; which < num_input_levels(); which++) {
for (size_t i = 0; i < inputs_[which].size(); i++) {
edit->DeleteFile(level_ + which, inputs_[which][i]->fd.GetNumber());
edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber());
}
}
}
bool Compaction::IsBaseLevelForKey(const Slice& user_key) {
bool Compaction::KeyNotExistsBeyondOutputLevel(const Slice& user_key) {
assert(cfd_->options()->compaction_style != kCompactionStyleFIFO);
if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
return bottommost_level_;
}
// Maybe use binary search to find right entry instead of linear search?
const Comparator* user_cmp = cfd_->user_comparator();
for (int lvl = level_ + 2; lvl < number_levels_; lvl++) {
for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
const std::vector<FileMetaData*>& files = input_version_->files_[lvl];
for (; level_ptrs_[lvl] < files.size(); ) {
FileMetaData* f = files[level_ptrs_[lvl]];
if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
// We've advanced far enough
if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
// Key falls in this file's range, so definitely not base level
// Key falls in this file's range, so definitely
// exists beyond output level
return false;
}
break;
@ -159,18 +161,18 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key) {
}
// Mark (or clear) each file that is being compacted
void Compaction::MarkFilesBeingCompacted(bool value) {
for (int i = 0; i < 2; i++) {
void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
for (int i = 0; i < num_input_levels(); i++) {
for (unsigned int j = 0; j < inputs_[i].size(); j++) {
assert(value ? !inputs_[i][j]->being_compacted :
inputs_[i][j]->being_compacted);
inputs_[i][j]->being_compacted = value;
assert(mark_as_compacted ? !inputs_[i][j]->being_compacted :
inputs_[i][j]->being_compacted);
inputs_[i][j]->being_compacted = mark_as_compacted;
}
}
}
// Is this compaction producing files at the bottommost level?
void Compaction::SetupBottomMostLevel(bool isManual) {
void Compaction::SetupBottomMostLevel(bool is_manual) {
assert(cfd_->options()->compaction_style != kCompactionStyleFIFO);
if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
// If universal compaction style is used and manual
@ -179,13 +181,14 @@ void Compaction::SetupBottomMostLevel(bool isManual) {
// run. We can safely set bottommost_level_ = true.
// If it is not manual compaction, then bottommost_level_
// is already set when the Compaction was created.
if (isManual) {
if (is_manual) {
bottommost_level_ = true;
}
return;
}
bottommost_level_ = true;
for (int i = output_level() + 1; i < number_levels_; i++) {
// checks whether there are files living beyond the output_level.
for (int i = output_level_ + 1; i < number_levels_; i++) {
if (input_version_->NumLevelFiles(i) > 0) {
bottommost_level_ = false;
break;
@ -211,7 +214,7 @@ void Compaction::ReleaseCompactionFiles(Status status) {
}
void Compaction::ResetNextCompactionIndex() {
input_version_->ResetNextCompactionIndex(level_);
input_version_->ResetNextCompactionIndex(start_level_);
}
namespace {
@ -238,24 +241,23 @@ void Compaction::Summary(char* output, int len) {
int write =
snprintf(output, len, "Base version %" PRIu64
" Base level %d, seek compaction:%d, inputs: [",
input_version_->GetVersionNumber(), level_, seek_compaction_);
input_version_->GetVersionNumber(),
start_level_, seek_compaction_);
if (write < 0 || write >= len) {
return;
}
write += InputSummary(inputs_[0].files, output + write, len - write);
if (write < 0 || write >= len) {
return;
}
write += snprintf(output + write, len - write, "], [");
if (write < 0 || write >= len) {
return;
}
write += InputSummary(inputs_[1].files, output + write, len - write);
if (write < 0 || write >= len) {
return;
for (int level = 0; level < num_input_levels(); ++level) {
if (level > 0) {
write += snprintf(output + write, len - write, "], [");
if (write < 0 || write >= len) {
return;
}
}
write += InputSummary(inputs_[level].files, output + write, len - write);
if (write < 0 || write >= len) {
return;
}
}
snprintf(output + write, len - write, "]");
@ -268,8 +270,10 @@ uint64_t Compaction::OutputFilePreallocationSize() {
preallocation_size =
cfd_->compaction_picker()->MaxFileSizeForLevel(output_level());
} else {
for (const auto& f : inputs_[0].files) {
preallocation_size += f->fd.GetFileSize();
for (int level = 0; level < num_input_levels(); ++level) {
for (const auto& f : inputs_[level].files) {
preallocation_size += f->fd.GetFileSize();
}
}
}
// Over-estimate slightly so we don't end up just barely crossing

View File

@ -14,6 +14,8 @@
namespace rocksdb {
// The structure that manages compaction input files associated
// with the same physical level.
struct CompactionInputFiles {
int level;
std::vector<FileMetaData*> files;
@ -36,35 +38,63 @@ class Compaction {
~Compaction();
// Returns the level associated to the specified compaction input level.
// If input_level is not specified, then input_level is set to 0.
int level(int input_level = 0) const { return inputs_[input_level].level; }
// If compaction_input_level is not specified, then input_level is set to 0.
int level(int compaction_input_level = 0) const {
return inputs_[compaction_input_level].level;
}
// Outputs will go to this level
int output_level() const { return out_level_; }
int output_level() const { return output_level_; }
// Returns the number of input levels in this compaction.
int num_input_levels() const { return inputs_.size(); }
// Return the object that holds the edits to the descriptor done
// by this compaction.
VersionEdit* edit() { return edit_; }
VersionEdit* edit() const { return edit_; }
// "which" must be either 0 or 1
int num_input_files(int which) const { return inputs_[which].size(); }
// Returns the number of input files associated to the specified
// compaction input level.
// The function will return 0 if when "compaction_input_level" < 0
// or "compaction_input_level" >= "num_input_levels()".
int num_input_files(int compaction_input_level) const {
if (compaction_input_level >= 0 &&
compaction_input_level < inputs_.size()) {
return inputs_[compaction_input_level].size();
}
return 0;
}
// Returns input version of the compaction
Version* input_version() const { return input_version_; }
// Returns the ColumnFamilyData associated with the compaction.
ColumnFamilyData* column_family_data() const { return cfd_; }
// Return the ith input file at "level()+which" ("which" must be 0 or 1).
FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
// Returns the list of FileMataData associated with the specified
// compaction input level.
std::vector<FileMetaData*>* inputs(int which) {
return &inputs_[which].files;
// Returns the file meta data of the 'i'th input file at the
// specified compaction input level.
// REQUIREMENT: "compaction_input_level" must be >= 0 and
// < "input_levels()"
FileMetaData* input(int compaction_input_level, int i) const {
assert(compaction_input_level < inputs_.size() &&
compaction_input_level >= 0);
return inputs_[compaction_input_level][i];
}
// Return the input_level file
FileLevel* input_levels(int which) { return &input_levels_[which]; }
// Returns the list of file meta data of the specified compaction
// input level.
// REQUIREMENT: "compaction_input_level" must be >= 0 and
// < "input_levels()"
std::vector<FileMetaData*>* const inputs(int compaction_input_level) {
assert(compaction_input_level < inputs_.size() &&
compaction_input_level >= 0);
return &inputs_[compaction_input_level].files;
}
// Returns the FileLevel of the specified compaction input level.
FileLevel* input_levels(int compaction_input_level) {
return &input_levels_[compaction_input_level];
}
// Maximum size of files to build during this compaction.
uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
@ -83,16 +113,17 @@ class Compaction {
// moving a single input file to the next level (no merging or splitting)
bool IsTrivialMove() const;
// If true, just delete all files in inputs_[0]
bool IsDeletionCompaction() const;
// If true, then the comaction can be done by simply deleting input files.
bool IsDeletionCompaction() const {
return deletion_compaction_;
}
// Add all inputs to this compaction as delete operations to *edit.
void AddInputDeletions(VersionEdit* edit);
// Returns true if the information we have available guarantees that
// the compaction is producing data in "level+1" for which no data exists
// in levels greater than "level+1".
bool IsBaseLevelForKey(const Slice& user_key);
// Returns true if the available information we have guarantees that
// the input "user_key" does not exist in any level beyond "output_level()".
bool KeyNotExistsBeyondOutputLevel(const Slice& user_key);
// Returns true iff we should stop building the current output
// before processing "internal_key".
@ -106,6 +137,9 @@ class Compaction {
// Delete this compaction from the list of running compactions.
void ReleaseCompactionFiles(Status status);
// Returns the summary of the compaction in "output" with maximum "len"
// in bytes. The caller is responsible for the memory management of
// "output".
void Summary(char* output, int len);
// Return the score that was used to pick this compaction run.
@ -120,9 +154,9 @@ class Compaction {
// Was this compaction triggered manually by the client?
bool IsManualCompaction() { return is_manual_compaction_; }
// Returns a number of byte that the output file should be preallocated to
// Returns the size in bytes that the output file should be preallocated to.
// In level compaction, that is max_file_size_. In universal compaction, that
// is the sum of all input file sizes
// is the sum of all input file sizes.
uint64_t OutputFilePreallocationSize();
private:
@ -131,13 +165,13 @@ class Compaction {
friend class FIFOCompactionPicker;
friend class LevelCompactionPicker;
Compaction(Version* input_version, int level, int out_level,
Compaction(Version* input_version, int start_level, int out_level,
uint64_t target_file_size, uint64_t max_grandparent_overlap_bytes,
uint32_t output_path_id, CompressionType output_compression,
bool seek_compaction = false, bool deletion_compaction = false);
int level_;
int out_level_; // levels to which output files are stored
const int start_level_; // the lowest level to be compacted
const int output_level_; // levels to which output files are stored
uint64_t max_output_file_size_;
uint64_t max_grandparent_overlap_bytes_;
Version* input_version_;
@ -149,25 +183,27 @@ class Compaction {
uint32_t output_path_id_;
CompressionType output_compression_;
bool seek_compaction_;
// if true, just delete files in inputs_[0]
// If true, then the comaction can be done by simply deleting input files.
bool deletion_compaction_;
// Each compaction reads inputs from "level_" and "level_+1"
CompactionInputFiles inputs_[2]; // The two sets of inputs
// Compaction input files organized by level.
autovector<CompactionInputFiles> inputs_;
// A copy of inputs_, organized more closely in memory
autovector<FileLevel, 2> input_levels_;
// State used to check for number of of overlapping grandparent files
// (parent == level_ + 1, grandparent == level_ + 2)
// (grandparent == "output_level_ + 1")
// This vector is updated by Version::GetOverlappingInputs().
std::vector<FileMetaData*> grandparents_;
size_t grandparent_index_; // Index in grandparent_starts_
bool seen_key_; // Some output key has been seen
size_t grandparent_index_; // Index in grandparent_starts_
bool seen_key_; // Some output key has been seen
uint64_t overlapped_bytes_; // Bytes of overlap between current output
// and grandparent files
int base_index_; // index of the file in files_[level_]
int parent_index_; // index of some file with same range in files_[level_+1]
double score_; // score that was used to pick this compaction.
// and grandparent files
int base_index_; // index of the file in files_[start_level_]
int parent_index_; // index of some file with same range in
// files_[start_level_+1]
double score_; // score that was used to pick this compaction.
// Is this compaction creating a file in the bottom most level?
bool bottommost_level_;
@ -177,17 +213,21 @@ class Compaction {
// Is this compaction requested by the client?
bool is_manual_compaction_;
// level_ptrs_ holds indices into input_version_->levels_: our state
// is that we are positioned at one of the file ranges for each
// higher level than the ones involved in this compaction (i.e. for
// all L >= level_ + 2).
// "level_ptrs_" holds indices into "input_version_->levels_", where each
// index remembers which file of an associated level we are currently used
// to check KeyNotExistsBeyondOutputLevel() for deletion operation.
// As it is for checking KeyNotExistsBeyondOutputLevel(), it only
// records indices for all levels beyond "output_level_".
std::vector<size_t> level_ptrs_;
// mark (or clear) all files that are being compacted
void MarkFilesBeingCompacted(bool);
void MarkFilesBeingCompacted(bool mark_as_compacted);
// Initialize whether compaction producing files at the bottommost level
void SetupBottomMostLevel(bool isManual);
// Initialize whether the compaction is producing files at the
// bottommost level.
//
// @see BottomMostLevel()
void SetupBottomMostLevel(bool is_manual);
// In case of compaction error, reset the nextIndex that is used
// to pick up the next file to be compacted from files_by_size_

View File

@ -2670,7 +2670,7 @@ Status DBImpl::ProcessKeyValueCompaction(
RecordTick(options_.statistics.get(), COMPACTION_KEY_DROP_NEWER_ENTRY);
} else if (ikey.type == kTypeDeletion &&
ikey.sequence <= earliest_snapshot &&
compact->compaction->IsBaseLevelForKey(ikey.user_key)) {
compact->compaction->KeyNotExistsBeyondOutputLevel(ikey.user_key)) {
// For this user key:
// (1) there is no data in higher levels
// (2) data in lower levels will have larger sequence numbers