Allow class Compaction to handle input files from multiple levels.
Summary: Allow class Compaction to handle input files from multiple levels. This diff is a subset of https://reviews.facebook.net/D19263 where only db/compaction.cc and db/compaction.h are changed. Test Plan: make db_test export ROCKSDB_TESTS=Compaction ./db_test Reviewers: igor, sdong, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D19923
This commit is contained in:
parent
296e340753
commit
3178510153
@ -26,14 +26,14 @@ static uint64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
||||
return sum;
|
||||
}
|
||||
|
||||
Compaction::Compaction(Version* input_version, int level, int out_level,
|
||||
Compaction::Compaction(Version* input_version, int start_level, int out_level,
|
||||
uint64_t target_file_size,
|
||||
uint64_t max_grandparent_overlap_bytes,
|
||||
uint32_t output_path_id,
|
||||
CompressionType output_compression, bool seek_compaction,
|
||||
bool deletion_compaction)
|
||||
: level_(level),
|
||||
out_level_(out_level),
|
||||
: start_level_(start_level),
|
||||
output_level_(out_level),
|
||||
max_output_file_size_(target_file_size),
|
||||
max_grandparent_overlap_bytes_(max_grandparent_overlap_bytes),
|
||||
input_version_(input_version),
|
||||
@ -61,8 +61,10 @@ Compaction::Compaction(Version* input_version, int level, int out_level,
|
||||
for (int i = 0; i < number_levels_; i++) {
|
||||
level_ptrs_[i] = 0;
|
||||
}
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
inputs_[i].level = level_ + i;
|
||||
int num_levels = output_level_ - start_level_ + 1;
|
||||
inputs_.resize(num_levels);
|
||||
for (int i = 0; i < num_levels; ++i) {
|
||||
inputs_[i].level = start_level_ + i;
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,39 +91,39 @@ bool Compaction::IsTrivialMove() const {
|
||||
// Avoid a move if there is lots of overlapping grandparent data.
|
||||
// Otherwise, the move could create a parent file that will require
|
||||
// a very expensive merge later on.
|
||||
// If level_== out_level_, the purpose is to force compaction filter to be
|
||||
// applied to that level, and thus cannot be a trivia move.
|
||||
return (level_ != out_level_ &&
|
||||
// If start_level_== output_level_, the purpose is to force compaction
|
||||
// filter to be applied to that level, and thus cannot be a trivia move.
|
||||
return (start_level_ != output_level_ &&
|
||||
num_input_levels() == 2 &&
|
||||
num_input_files(0) == 1 &&
|
||||
num_input_files(1) == 0 &&
|
||||
TotalFileSize(grandparents_) <= max_grandparent_overlap_bytes_);
|
||||
}
|
||||
|
||||
bool Compaction::IsDeletionCompaction() const { return deletion_compaction_; }
|
||||
|
||||
void Compaction::AddInputDeletions(VersionEdit* edit) {
|
||||
for (int which = 0; which < 2; which++) {
|
||||
for (int which = 0; which < num_input_levels(); which++) {
|
||||
for (size_t i = 0; i < inputs_[which].size(); i++) {
|
||||
edit->DeleteFile(level_ + which, inputs_[which][i]->fd.GetNumber());
|
||||
edit->DeleteFile(level(which), inputs_[which][i]->fd.GetNumber());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool Compaction::IsBaseLevelForKey(const Slice& user_key) {
|
||||
bool Compaction::KeyNotExistsBeyondOutputLevel(const Slice& user_key) {
|
||||
assert(cfd_->options()->compaction_style != kCompactionStyleFIFO);
|
||||
if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
|
||||
return bottommost_level_;
|
||||
}
|
||||
// Maybe use binary search to find right entry instead of linear search?
|
||||
const Comparator* user_cmp = cfd_->user_comparator();
|
||||
for (int lvl = level_ + 2; lvl < number_levels_; lvl++) {
|
||||
for (int lvl = output_level_ + 1; lvl < number_levels_; lvl++) {
|
||||
const std::vector<FileMetaData*>& files = input_version_->files_[lvl];
|
||||
for (; level_ptrs_[lvl] < files.size(); ) {
|
||||
FileMetaData* f = files[level_ptrs_[lvl]];
|
||||
if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
|
||||
// We've advanced far enough
|
||||
if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
|
||||
// Key falls in this file's range, so definitely not base level
|
||||
// Key falls in this file's range, so definitely
|
||||
// exists beyond output level
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@ -159,18 +161,18 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key) {
|
||||
}
|
||||
|
||||
// Mark (or clear) each file that is being compacted
|
||||
void Compaction::MarkFilesBeingCompacted(bool value) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
void Compaction::MarkFilesBeingCompacted(bool mark_as_compacted) {
|
||||
for (int i = 0; i < num_input_levels(); i++) {
|
||||
for (unsigned int j = 0; j < inputs_[i].size(); j++) {
|
||||
assert(value ? !inputs_[i][j]->being_compacted :
|
||||
inputs_[i][j]->being_compacted);
|
||||
inputs_[i][j]->being_compacted = value;
|
||||
assert(mark_as_compacted ? !inputs_[i][j]->being_compacted :
|
||||
inputs_[i][j]->being_compacted);
|
||||
inputs_[i][j]->being_compacted = mark_as_compacted;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Is this compaction producing files at the bottommost level?
|
||||
void Compaction::SetupBottomMostLevel(bool isManual) {
|
||||
void Compaction::SetupBottomMostLevel(bool is_manual) {
|
||||
assert(cfd_->options()->compaction_style != kCompactionStyleFIFO);
|
||||
if (cfd_->options()->compaction_style == kCompactionStyleUniversal) {
|
||||
// If universal compaction style is used and manual
|
||||
@ -179,13 +181,14 @@ void Compaction::SetupBottomMostLevel(bool isManual) {
|
||||
// run. We can safely set bottommost_level_ = true.
|
||||
// If it is not manual compaction, then bottommost_level_
|
||||
// is already set when the Compaction was created.
|
||||
if (isManual) {
|
||||
if (is_manual) {
|
||||
bottommost_level_ = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
bottommost_level_ = true;
|
||||
for (int i = output_level() + 1; i < number_levels_; i++) {
|
||||
// checks whether there are files living beyond the output_level.
|
||||
for (int i = output_level_ + 1; i < number_levels_; i++) {
|
||||
if (input_version_->NumLevelFiles(i) > 0) {
|
||||
bottommost_level_ = false;
|
||||
break;
|
||||
@ -211,7 +214,7 @@ void Compaction::ReleaseCompactionFiles(Status status) {
|
||||
}
|
||||
|
||||
void Compaction::ResetNextCompactionIndex() {
|
||||
input_version_->ResetNextCompactionIndex(level_);
|
||||
input_version_->ResetNextCompactionIndex(start_level_);
|
||||
}
|
||||
|
||||
namespace {
|
||||
@ -238,24 +241,23 @@ void Compaction::Summary(char* output, int len) {
|
||||
int write =
|
||||
snprintf(output, len, "Base version %" PRIu64
|
||||
" Base level %d, seek compaction:%d, inputs: [",
|
||||
input_version_->GetVersionNumber(), level_, seek_compaction_);
|
||||
input_version_->GetVersionNumber(),
|
||||
start_level_, seek_compaction_);
|
||||
if (write < 0 || write >= len) {
|
||||
return;
|
||||
}
|
||||
|
||||
write += InputSummary(inputs_[0].files, output + write, len - write);
|
||||
if (write < 0 || write >= len) {
|
||||
return;
|
||||
}
|
||||
|
||||
write += snprintf(output + write, len - write, "], [");
|
||||
if (write < 0 || write >= len) {
|
||||
return;
|
||||
}
|
||||
|
||||
write += InputSummary(inputs_[1].files, output + write, len - write);
|
||||
if (write < 0 || write >= len) {
|
||||
return;
|
||||
for (int level = 0; level < num_input_levels(); ++level) {
|
||||
if (level > 0) {
|
||||
write += snprintf(output + write, len - write, "], [");
|
||||
if (write < 0 || write >= len) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
write += InputSummary(inputs_[level].files, output + write, len - write);
|
||||
if (write < 0 || write >= len) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
snprintf(output + write, len - write, "]");
|
||||
@ -268,8 +270,10 @@ uint64_t Compaction::OutputFilePreallocationSize() {
|
||||
preallocation_size =
|
||||
cfd_->compaction_picker()->MaxFileSizeForLevel(output_level());
|
||||
} else {
|
||||
for (const auto& f : inputs_[0].files) {
|
||||
preallocation_size += f->fd.GetFileSize();
|
||||
for (int level = 0; level < num_input_levels(); ++level) {
|
||||
for (const auto& f : inputs_[level].files) {
|
||||
preallocation_size += f->fd.GetFileSize();
|
||||
}
|
||||
}
|
||||
}
|
||||
// Over-estimate slightly so we don't end up just barely crossing
|
||||
|
126
db/compaction.h
126
db/compaction.h
@ -14,6 +14,8 @@
|
||||
|
||||
namespace rocksdb {
|
||||
|
||||
// The structure that manages compaction input files associated
|
||||
// with the same physical level.
|
||||
struct CompactionInputFiles {
|
||||
int level;
|
||||
std::vector<FileMetaData*> files;
|
||||
@ -36,35 +38,63 @@ class Compaction {
|
||||
~Compaction();
|
||||
|
||||
// Returns the level associated to the specified compaction input level.
|
||||
// If input_level is not specified, then input_level is set to 0.
|
||||
int level(int input_level = 0) const { return inputs_[input_level].level; }
|
||||
// If compaction_input_level is not specified, then input_level is set to 0.
|
||||
int level(int compaction_input_level = 0) const {
|
||||
return inputs_[compaction_input_level].level;
|
||||
}
|
||||
|
||||
// Outputs will go to this level
|
||||
int output_level() const { return out_level_; }
|
||||
int output_level() const { return output_level_; }
|
||||
|
||||
// Returns the number of input levels in this compaction.
|
||||
int num_input_levels() const { return inputs_.size(); }
|
||||
|
||||
// Return the object that holds the edits to the descriptor done
|
||||
// by this compaction.
|
||||
VersionEdit* edit() { return edit_; }
|
||||
VersionEdit* edit() const { return edit_; }
|
||||
|
||||
// "which" must be either 0 or 1
|
||||
int num_input_files(int which) const { return inputs_[which].size(); }
|
||||
// Returns the number of input files associated to the specified
|
||||
// compaction input level.
|
||||
// The function will return 0 if when "compaction_input_level" < 0
|
||||
// or "compaction_input_level" >= "num_input_levels()".
|
||||
int num_input_files(int compaction_input_level) const {
|
||||
if (compaction_input_level >= 0 &&
|
||||
compaction_input_level < inputs_.size()) {
|
||||
return inputs_[compaction_input_level].size();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Returns input version of the compaction
|
||||
Version* input_version() const { return input_version_; }
|
||||
|
||||
// Returns the ColumnFamilyData associated with the compaction.
|
||||
ColumnFamilyData* column_family_data() const { return cfd_; }
|
||||
|
||||
// Return the ith input file at "level()+which" ("which" must be 0 or 1).
|
||||
FileMetaData* input(int which, int i) const { return inputs_[which][i]; }
|
||||
|
||||
// Returns the list of FileMataData associated with the specified
|
||||
// compaction input level.
|
||||
std::vector<FileMetaData*>* inputs(int which) {
|
||||
return &inputs_[which].files;
|
||||
// Returns the file meta data of the 'i'th input file at the
|
||||
// specified compaction input level.
|
||||
// REQUIREMENT: "compaction_input_level" must be >= 0 and
|
||||
// < "input_levels()"
|
||||
FileMetaData* input(int compaction_input_level, int i) const {
|
||||
assert(compaction_input_level < inputs_.size() &&
|
||||
compaction_input_level >= 0);
|
||||
return inputs_[compaction_input_level][i];
|
||||
}
|
||||
|
||||
// Return the input_level file
|
||||
FileLevel* input_levels(int which) { return &input_levels_[which]; }
|
||||
// Returns the list of file meta data of the specified compaction
|
||||
// input level.
|
||||
// REQUIREMENT: "compaction_input_level" must be >= 0 and
|
||||
// < "input_levels()"
|
||||
std::vector<FileMetaData*>* const inputs(int compaction_input_level) {
|
||||
assert(compaction_input_level < inputs_.size() &&
|
||||
compaction_input_level >= 0);
|
||||
return &inputs_[compaction_input_level].files;
|
||||
}
|
||||
|
||||
// Returns the FileLevel of the specified compaction input level.
|
||||
FileLevel* input_levels(int compaction_input_level) {
|
||||
return &input_levels_[compaction_input_level];
|
||||
}
|
||||
|
||||
// Maximum size of files to build during this compaction.
|
||||
uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
|
||||
@ -83,16 +113,17 @@ class Compaction {
|
||||
// moving a single input file to the next level (no merging or splitting)
|
||||
bool IsTrivialMove() const;
|
||||
|
||||
// If true, just delete all files in inputs_[0]
|
||||
bool IsDeletionCompaction() const;
|
||||
// If true, then the comaction can be done by simply deleting input files.
|
||||
bool IsDeletionCompaction() const {
|
||||
return deletion_compaction_;
|
||||
}
|
||||
|
||||
// Add all inputs to this compaction as delete operations to *edit.
|
||||
void AddInputDeletions(VersionEdit* edit);
|
||||
|
||||
// Returns true if the information we have available guarantees that
|
||||
// the compaction is producing data in "level+1" for which no data exists
|
||||
// in levels greater than "level+1".
|
||||
bool IsBaseLevelForKey(const Slice& user_key);
|
||||
// Returns true if the available information we have guarantees that
|
||||
// the input "user_key" does not exist in any level beyond "output_level()".
|
||||
bool KeyNotExistsBeyondOutputLevel(const Slice& user_key);
|
||||
|
||||
// Returns true iff we should stop building the current output
|
||||
// before processing "internal_key".
|
||||
@ -106,6 +137,9 @@ class Compaction {
|
||||
// Delete this compaction from the list of running compactions.
|
||||
void ReleaseCompactionFiles(Status status);
|
||||
|
||||
// Returns the summary of the compaction in "output" with maximum "len"
|
||||
// in bytes. The caller is responsible for the memory management of
|
||||
// "output".
|
||||
void Summary(char* output, int len);
|
||||
|
||||
// Return the score that was used to pick this compaction run.
|
||||
@ -120,9 +154,9 @@ class Compaction {
|
||||
// Was this compaction triggered manually by the client?
|
||||
bool IsManualCompaction() { return is_manual_compaction_; }
|
||||
|
||||
// Returns a number of byte that the output file should be preallocated to
|
||||
// Returns the size in bytes that the output file should be preallocated to.
|
||||
// In level compaction, that is max_file_size_. In universal compaction, that
|
||||
// is the sum of all input file sizes
|
||||
// is the sum of all input file sizes.
|
||||
uint64_t OutputFilePreallocationSize();
|
||||
|
||||
private:
|
||||
@ -131,13 +165,13 @@ class Compaction {
|
||||
friend class FIFOCompactionPicker;
|
||||
friend class LevelCompactionPicker;
|
||||
|
||||
Compaction(Version* input_version, int level, int out_level,
|
||||
Compaction(Version* input_version, int start_level, int out_level,
|
||||
uint64_t target_file_size, uint64_t max_grandparent_overlap_bytes,
|
||||
uint32_t output_path_id, CompressionType output_compression,
|
||||
bool seek_compaction = false, bool deletion_compaction = false);
|
||||
|
||||
int level_;
|
||||
int out_level_; // levels to which output files are stored
|
||||
const int start_level_; // the lowest level to be compacted
|
||||
const int output_level_; // levels to which output files are stored
|
||||
uint64_t max_output_file_size_;
|
||||
uint64_t max_grandparent_overlap_bytes_;
|
||||
Version* input_version_;
|
||||
@ -149,25 +183,27 @@ class Compaction {
|
||||
uint32_t output_path_id_;
|
||||
CompressionType output_compression_;
|
||||
bool seek_compaction_;
|
||||
// if true, just delete files in inputs_[0]
|
||||
// If true, then the comaction can be done by simply deleting input files.
|
||||
bool deletion_compaction_;
|
||||
|
||||
// Each compaction reads inputs from "level_" and "level_+1"
|
||||
CompactionInputFiles inputs_[2]; // The two sets of inputs
|
||||
// Compaction input files organized by level.
|
||||
autovector<CompactionInputFiles> inputs_;
|
||||
|
||||
// A copy of inputs_, organized more closely in memory
|
||||
autovector<FileLevel, 2> input_levels_;
|
||||
|
||||
// State used to check for number of of overlapping grandparent files
|
||||
// (parent == level_ + 1, grandparent == level_ + 2)
|
||||
// (grandparent == "output_level_ + 1")
|
||||
// This vector is updated by Version::GetOverlappingInputs().
|
||||
std::vector<FileMetaData*> grandparents_;
|
||||
size_t grandparent_index_; // Index in grandparent_starts_
|
||||
bool seen_key_; // Some output key has been seen
|
||||
size_t grandparent_index_; // Index in grandparent_starts_
|
||||
bool seen_key_; // Some output key has been seen
|
||||
uint64_t overlapped_bytes_; // Bytes of overlap between current output
|
||||
// and grandparent files
|
||||
int base_index_; // index of the file in files_[level_]
|
||||
int parent_index_; // index of some file with same range in files_[level_+1]
|
||||
double score_; // score that was used to pick this compaction.
|
||||
// and grandparent files
|
||||
int base_index_; // index of the file in files_[start_level_]
|
||||
int parent_index_; // index of some file with same range in
|
||||
// files_[start_level_+1]
|
||||
double score_; // score that was used to pick this compaction.
|
||||
|
||||
// Is this compaction creating a file in the bottom most level?
|
||||
bool bottommost_level_;
|
||||
@ -177,17 +213,21 @@ class Compaction {
|
||||
// Is this compaction requested by the client?
|
||||
bool is_manual_compaction_;
|
||||
|
||||
// level_ptrs_ holds indices into input_version_->levels_: our state
|
||||
// is that we are positioned at one of the file ranges for each
|
||||
// higher level than the ones involved in this compaction (i.e. for
|
||||
// all L >= level_ + 2).
|
||||
// "level_ptrs_" holds indices into "input_version_->levels_", where each
|
||||
// index remembers which file of an associated level we are currently used
|
||||
// to check KeyNotExistsBeyondOutputLevel() for deletion operation.
|
||||
// As it is for checking KeyNotExistsBeyondOutputLevel(), it only
|
||||
// records indices for all levels beyond "output_level_".
|
||||
std::vector<size_t> level_ptrs_;
|
||||
|
||||
// mark (or clear) all files that are being compacted
|
||||
void MarkFilesBeingCompacted(bool);
|
||||
void MarkFilesBeingCompacted(bool mark_as_compacted);
|
||||
|
||||
// Initialize whether compaction producing files at the bottommost level
|
||||
void SetupBottomMostLevel(bool isManual);
|
||||
// Initialize whether the compaction is producing files at the
|
||||
// bottommost level.
|
||||
//
|
||||
// @see BottomMostLevel()
|
||||
void SetupBottomMostLevel(bool is_manual);
|
||||
|
||||
// In case of compaction error, reset the nextIndex that is used
|
||||
// to pick up the next file to be compacted from files_by_size_
|
||||
|
@ -2670,7 +2670,7 @@ Status DBImpl::ProcessKeyValueCompaction(
|
||||
RecordTick(options_.statistics.get(), COMPACTION_KEY_DROP_NEWER_ENTRY);
|
||||
} else if (ikey.type == kTypeDeletion &&
|
||||
ikey.sequence <= earliest_snapshot &&
|
||||
compact->compaction->IsBaseLevelForKey(ikey.user_key)) {
|
||||
compact->compaction->KeyNotExistsBeyondOutputLevel(ikey.user_key)) {
|
||||
// For this user key:
|
||||
// (1) there is no data in higher levels
|
||||
// (2) data in lower levels will have larger sequence numbers
|
||||
|
Loading…
x
Reference in New Issue
Block a user