Add a mode to always pick the oldest file to compact for each level
Summary: Add options.compaction_pri, which specifies the policy about which file to compact first. kCompactionPriByLargestSeq will compact oldest files first. Verified the behavior in db_bench but did not write unit tests yet. Also need to make it settable through option string and dynamically changeable. Test Plan: Will write unit tests Reviewers: igor, rven, anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, MarkCallaghan Reviewed By: yhchiang Subscribers: leveldb, dhruba Differential Revision: https://reviews.facebook.net/D45951
This commit is contained in:
parent
dd2e1eeb35
commit
f1b9f804e9
@ -1028,7 +1028,7 @@ bool LevelCompactionPicker::PickCompactionBySize(VersionStorageInfo* vstorage,
|
||||
|
||||
// Pick the largest file in this level that is not already
|
||||
// being compacted
|
||||
const std::vector<int>& file_size = vstorage->FilesBySize(level);
|
||||
const std::vector<int>& file_size = vstorage->FilesByCompactionPri(level);
|
||||
const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(level);
|
||||
|
||||
// record the first file that is not yet compacted
|
||||
@ -1039,11 +1039,6 @@ bool LevelCompactionPicker::PickCompactionBySize(VersionStorageInfo* vstorage,
|
||||
int index = file_size[i];
|
||||
auto* f = level_files[index];
|
||||
|
||||
assert((i == file_size.size() - 1) ||
|
||||
(i >= VersionStorageInfo::kNumberFilesToSort - 1) ||
|
||||
(f->compensated_file_size >=
|
||||
level_files[file_size[i + 1]]->compensated_file_size));
|
||||
|
||||
// do not pick a file to compact if it is being compacted
|
||||
// from n-1 level.
|
||||
if (f->being_compacted) {
|
||||
|
@ -117,7 +117,7 @@ class CompactionPickerTest : public testing::Test {
|
||||
|
||||
void UpdateVersionStorageInfo() {
|
||||
vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_);
|
||||
vstorage_->UpdateFilesBySize();
|
||||
vstorage_->UpdateFilesByCompactionPri(mutable_cf_options_);
|
||||
vstorage_->UpdateNumNonEmptyLevels();
|
||||
vstorage_->GenerateFileIndexer();
|
||||
vstorage_->GenerateLevelFilesBrief();
|
||||
|
@ -314,6 +314,10 @@ static rocksdb::CompactionStyle FLAGS_compaction_style_e;
|
||||
DEFINE_int32(compaction_style, (int32_t) rocksdb::Options().compaction_style,
|
||||
"style of compaction: level-based vs universal");
|
||||
|
||||
static rocksdb::CompactionPri FLAGS_compaction_pri_e;
|
||||
DEFINE_int32(compaction_pri, (int32_t)rocksdb::Options().compaction_style,
|
||||
"priority of files to compaction: by size or by data age");
|
||||
|
||||
DEFINE_int32(universal_size_ratio, 0,
|
||||
"Percentage flexibility while comparing file size"
|
||||
" (for universal compaction only).");
|
||||
@ -2248,6 +2252,7 @@ class Benchmark {
|
||||
options.max_subcompactions = static_cast<uint32_t>(FLAGS_subcompactions);
|
||||
options.max_background_flushes = FLAGS_max_background_flushes;
|
||||
options.compaction_style = FLAGS_compaction_style_e;
|
||||
options.compaction_pri = FLAGS_compaction_pri_e;
|
||||
if (FLAGS_prefix_size != 0) {
|
||||
options.prefix_extractor.reset(
|
||||
NewFixedPrefixTransform(FLAGS_prefix_size));
|
||||
@ -3957,6 +3962,7 @@ int main(int argc, char** argv) {
|
||||
if (FLAGS_statistics) {
|
||||
dbstats = rocksdb::CreateDBStatistics();
|
||||
}
|
||||
FLAGS_compaction_pri_e = (rocksdb::CompactionPri)FLAGS_compaction_pri;
|
||||
|
||||
std::vector<std::string> fanout = rocksdb::StringSplit(
|
||||
FLAGS_max_bytes_for_level_multiplier_additional, ',');
|
||||
|
@ -77,7 +77,7 @@ class VersionBuilderTest : public testing::Test {
|
||||
}
|
||||
|
||||
void UpdateVersionStorageInfo() {
|
||||
vstorage_.UpdateFilesBySize();
|
||||
vstorage_.UpdateFilesByCompactionPri(mutable_cf_options_);
|
||||
vstorage_.UpdateNumNonEmptyLevels();
|
||||
vstorage_.GenerateFileIndexer();
|
||||
vstorage_.GenerateLevelFilesBrief();
|
||||
|
@ -782,7 +782,7 @@ VersionStorageInfo::VersionStorageInfo(
|
||||
compaction_style_(compaction_style),
|
||||
files_(new std::vector<FileMetaData*>[num_levels_]),
|
||||
base_level_(num_levels_ == 1 ? -1 : 1),
|
||||
files_by_size_(num_levels_),
|
||||
files_by_compaction_pri_(num_levels_),
|
||||
level0_non_overlapping_(false),
|
||||
next_file_to_compact_by_size_(num_levels_),
|
||||
compaction_score_(num_levels_),
|
||||
@ -923,7 +923,7 @@ void Version::PrepareApply(
|
||||
UpdateAccumulatedStats(update_stats);
|
||||
storage_info_.UpdateNumNonEmptyLevels();
|
||||
storage_info_.CalculateBaseBytes(*cfd_->ioptions(), mutable_cf_options);
|
||||
storage_info_.UpdateFilesBySize();
|
||||
storage_info_.UpdateFilesByCompactionPri(mutable_cf_options);
|
||||
storage_info_.GenerateFileIndexer();
|
||||
storage_info_.GenerateLevelFilesBrief();
|
||||
storage_info_.GenerateLevel0NonOverlapping();
|
||||
@ -1227,7 +1227,6 @@ bool CompareCompensatedSizeDescending(const Fsize& first, const Fsize& second) {
|
||||
return (first.file->compensated_file_size >
|
||||
second.file->compensated_file_size);
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void VersionStorageInfo::AddFile(int level, FileMetaData* f) {
|
||||
@ -1245,7 +1244,7 @@ void VersionStorageInfo::AddFile(int level, FileMetaData* f) {
|
||||
// following functions called:
|
||||
// 1. UpdateNumNonEmptyLevels();
|
||||
// 2. CalculateBaseBytes();
|
||||
// 3. UpdateFilesBySize();
|
||||
// 3. UpdateFilesByCompactionPri();
|
||||
// 4. GenerateFileIndexer();
|
||||
// 5. GenerateLevelFilesBrief();
|
||||
// 6. GenerateLevel0NonOverlapping();
|
||||
@ -1297,7 +1296,8 @@ void VersionStorageInfo::UpdateNumNonEmptyLevels() {
|
||||
}
|
||||
}
|
||||
|
||||
void VersionStorageInfo::UpdateFilesBySize() {
|
||||
void VersionStorageInfo::UpdateFilesByCompactionPri(
|
||||
const MutableCFOptions& mutable_cf_options) {
|
||||
if (compaction_style_ == kCompactionStyleFIFO ||
|
||||
compaction_style_ == kCompactionStyleUniversal) {
|
||||
// don't need this
|
||||
@ -1306,8 +1306,8 @@ void VersionStorageInfo::UpdateFilesBySize() {
|
||||
// No need to sort the highest level because it is never compacted.
|
||||
for (int level = 0; level < num_levels() - 1; level++) {
|
||||
const std::vector<FileMetaData*>& files = files_[level];
|
||||
auto& files_by_size = files_by_size_[level];
|
||||
assert(files_by_size.size() == 0);
|
||||
auto& files_by_compaction_pri = files_by_compaction_pri_[level];
|
||||
assert(files_by_compaction_pri.size() == 0);
|
||||
|
||||
// populate a temp vector for sorting based on size
|
||||
std::vector<Fsize> temp(files.size());
|
||||
@ -1321,16 +1321,28 @@ void VersionStorageInfo::UpdateFilesBySize() {
|
||||
if (num > temp.size()) {
|
||||
num = temp.size();
|
||||
}
|
||||
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
|
||||
CompareCompensatedSizeDescending);
|
||||
switch (mutable_cf_options.compaction_pri) {
|
||||
case kCompactionPriByCompensatedSize:
|
||||
std::partial_sort(temp.begin(), temp.begin() + num, temp.end(),
|
||||
CompareCompensatedSizeDescending);
|
||||
break;
|
||||
case kCompactionPriByLargestSeq:
|
||||
std::sort(temp.begin(), temp.end(),
|
||||
[this](const Fsize& f1, const Fsize& f2) -> bool {
|
||||
return f1.file->largest_seqno < f2.file->largest_seqno;
|
||||
});
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
assert(temp.size() == files.size());
|
||||
|
||||
// initialize files_by_size_
|
||||
// initialize files_by_compaction_pri_
|
||||
for (unsigned int i = 0; i < temp.size(); i++) {
|
||||
files_by_size.push_back(temp[i].index);
|
||||
files_by_compaction_pri.push_back(temp[i].index);
|
||||
}
|
||||
next_file_to_compact_by_size_[level] = 0;
|
||||
assert(files_[level].size() == files_by_size_[level].size());
|
||||
assert(files_[level].size() == files_by_compaction_pri_[level].size());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -132,8 +132,9 @@ class VersionStorageInfo {
|
||||
// Generate level_files_brief_ from files_
|
||||
void GenerateLevelFilesBrief();
|
||||
// Sort all files for this version based on their file size and
|
||||
// record results in files_by_size_. The largest files are listed first.
|
||||
void UpdateFilesBySize();
|
||||
// record results in files_by_compaction_pri_. The largest files are listed
|
||||
// first.
|
||||
void UpdateFilesByCompactionPri(const MutableCFOptions& mutable_cf_options);
|
||||
|
||||
void GenerateLevel0NonOverlapping();
|
||||
bool level0_non_overlapping() const {
|
||||
@ -226,9 +227,9 @@ class VersionStorageInfo {
|
||||
}
|
||||
|
||||
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
|
||||
const std::vector<int>& FilesBySize(int level) const {
|
||||
const std::vector<int>& FilesByCompactionPri(int level) const {
|
||||
assert(finalized_);
|
||||
return files_by_size_[level];
|
||||
return files_by_compaction_pri_[level];
|
||||
}
|
||||
|
||||
// REQUIRES: This version has been saved (see VersionSet::SaveTo)
|
||||
@ -242,7 +243,7 @@ class VersionStorageInfo {
|
||||
int base_level() const { return base_level_; }
|
||||
|
||||
// REQUIRES: lock is held
|
||||
// Set the index that is used to offset into files_by_size_ to find
|
||||
// Set the index that is used to offset into files_by_compaction_pri_ to find
|
||||
// the next compaction candidate file.
|
||||
void SetNextCompactionIndex(int level, int index) {
|
||||
next_file_to_compact_by_size_[level] = index;
|
||||
@ -259,7 +260,7 @@ class VersionStorageInfo {
|
||||
return file_indexer_;
|
||||
}
|
||||
|
||||
// Only the first few entries of files_by_size_ are sorted.
|
||||
// Only the first few entries of files_by_compaction_pri_ are sorted.
|
||||
// There is no need to sort all the files because it is likely
|
||||
// that on a running system, we need to look at only the first
|
||||
// few largest files because a new version is created every few
|
||||
@ -299,7 +300,8 @@ class VersionStorageInfo {
|
||||
|
||||
uint64_t GetEstimatedActiveKeys() const;
|
||||
|
||||
// re-initializes the index that is used to offset into files_by_size_
|
||||
// re-initializes the index that is used to offset into
|
||||
// files_by_compaction_pri_
|
||||
// to find the next compaction candidate file.
|
||||
void ResetNextCompactionIndex(int level) {
|
||||
next_file_to_compact_by_size_[level] = 0;
|
||||
@ -351,16 +353,16 @@ class VersionStorageInfo {
|
||||
// but files in each level are now sorted based on file
|
||||
// size. The file with the largest size is at the front.
|
||||
// This vector stores the index of the file from files_.
|
||||
std::vector<std::vector<int>> files_by_size_;
|
||||
std::vector<std::vector<int>> files_by_compaction_pri_;
|
||||
|
||||
// If true, means that files in L0 have keys with non overlapping ranges
|
||||
bool level0_non_overlapping_;
|
||||
|
||||
// An index into files_by_size_ that specifies the first
|
||||
// An index into files_by_compaction_pri_ that specifies the first
|
||||
// file that is not yet compacted
|
||||
std::vector<int> next_file_to_compact_by_size_;
|
||||
|
||||
// Only the first few entries of files_by_size_ are sorted.
|
||||
// Only the first few entries of files_by_compaction_pri_ are sorted.
|
||||
// There is no need to sort all the files because it is likely
|
||||
// that on a running system, we need to look at only the first
|
||||
// few largest files because a new version is created every few
|
||||
@ -513,8 +515,9 @@ class Version {
|
||||
void UpdateAccumulatedStats(bool update_stats);
|
||||
|
||||
// Sort all files for this version based on their file size and
|
||||
// record results in files_by_size_. The largest files are listed first.
|
||||
void UpdateFilesBySize();
|
||||
// record results in files_by_compaction_pri_. The largest files are listed
|
||||
// first.
|
||||
void UpdateFilesByCompactionPri();
|
||||
|
||||
ColumnFamilyData* cfd_; // ColumnFamilyData to which this Version belongs
|
||||
Logger* info_log_;
|
||||
|
@ -80,6 +80,13 @@ enum CompactionStyle : char {
|
||||
kCompactionStyleNone = 0x3,
|
||||
};
|
||||
|
||||
enum CompactionPri : char {
|
||||
// Slightly Priotize larger files by size compensated by #deletes
|
||||
kCompactionPriByCompensatedSize = 0x0,
|
||||
// First compact files whose data is oldest.
|
||||
kCompactionPriByLargestSeq = 0x1,
|
||||
};
|
||||
|
||||
enum class WALRecoveryMode : char {
|
||||
// Original levelDB recovery
|
||||
// We tolerate incomplete record in trailing data on all logs
|
||||
@ -547,6 +554,11 @@ struct ColumnFamilyOptions {
|
||||
// The compaction style. Default: kCompactionStyleLevel
|
||||
CompactionStyle compaction_style;
|
||||
|
||||
// If level compaction_style = kCompactionStyleLevel, for each level,
|
||||
// which files are prioritized to be picked to compact.
|
||||
// Default: kCompactionPriByCompensatedSize
|
||||
CompactionPri compaction_pri;
|
||||
|
||||
// If true, compaction will verify checksum on every read that happens
|
||||
// as part of compaction
|
||||
//
|
||||
|
@ -31,6 +31,7 @@ struct MutableCFOptions {
|
||||
options.level0_file_num_compaction_trigger),
|
||||
level0_slowdown_writes_trigger(options.level0_slowdown_writes_trigger),
|
||||
level0_stop_writes_trigger(options.level0_stop_writes_trigger),
|
||||
compaction_pri(options.compaction_pri),
|
||||
max_grandparent_overlap_factor(options.max_grandparent_overlap_factor),
|
||||
expanded_compaction_factor(options.expanded_compaction_factor),
|
||||
source_compaction_factor(options.source_compaction_factor),
|
||||
@ -66,6 +67,7 @@ struct MutableCFOptions {
|
||||
level0_file_num_compaction_trigger(0),
|
||||
level0_slowdown_writes_trigger(0),
|
||||
level0_stop_writes_trigger(0),
|
||||
compaction_pri(kCompactionPriByCompensatedSize),
|
||||
max_grandparent_overlap_factor(0),
|
||||
expanded_compaction_factor(0),
|
||||
source_compaction_factor(0),
|
||||
@ -117,6 +119,7 @@ struct MutableCFOptions {
|
||||
int level0_file_num_compaction_trigger;
|
||||
int level0_slowdown_writes_trigger;
|
||||
int level0_stop_writes_trigger;
|
||||
CompactionPri compaction_pri;
|
||||
int max_grandparent_overlap_factor;
|
||||
int expanded_compaction_factor;
|
||||
int source_compaction_factor;
|
||||
|
@ -109,6 +109,7 @@ ColumnFamilyOptions::ColumnFamilyOptions()
|
||||
disable_auto_compactions(false),
|
||||
purge_redundant_kvs_while_flush(true),
|
||||
compaction_style(kCompactionStyleLevel),
|
||||
compaction_pri(kCompactionPriByCompensatedSize),
|
||||
verify_checksums_in_compaction(true),
|
||||
filter_deletes(false),
|
||||
max_sequential_skip_in_iterations(8),
|
||||
@ -170,6 +171,7 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
|
||||
disable_auto_compactions(options.disable_auto_compactions),
|
||||
purge_redundant_kvs_while_flush(options.purge_redundant_kvs_while_flush),
|
||||
compaction_style(options.compaction_style),
|
||||
compaction_pri(options.compaction_pri),
|
||||
verify_checksums_in_compaction(options.verify_checksums_in_compaction),
|
||||
compaction_options_universal(options.compaction_options_universal),
|
||||
compaction_options_fifo(options.compaction_options_fifo),
|
||||
@ -492,6 +494,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
|
||||
verify_checksums_in_compaction);
|
||||
Header(log, " Options.compaction_style: %d",
|
||||
compaction_style);
|
||||
Header(log, " Options.compaction_pri: %d",
|
||||
compaction_pri);
|
||||
Header(log, " Options.compaction_options_universal.size_ratio: %u",
|
||||
compaction_options_universal.size_ratio);
|
||||
Header(log, "Options.compaction_options_universal.min_merge_width: %u",
|
||||
|
Loading…
Reference in New Issue
Block a user