2014-10-27 23:49:46 +01:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
#include "db/compaction_picker.h"
|
2014-12-16 06:48:16 +01:00
|
|
|
#include <limits>
|
2014-10-27 23:49:46 +01:00
|
|
|
#include <string>
|
|
|
|
#include "util/logging.h"
|
2015-03-20 01:29:37 +01:00
|
|
|
#include "util/string_util.h"
|
2014-10-27 23:49:46 +01:00
|
|
|
#include "util/testharness.h"
|
|
|
|
#include "util/testutil.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
|
|
|
class CountingLogger : public Logger {
|
|
|
|
public:
|
2015-02-01 20:08:19 +01:00
|
|
|
using Logger::Logv;
|
2014-10-27 23:49:46 +01:00
|
|
|
virtual void Logv(const char* format, va_list ap) override { log_count++; }
|
|
|
|
size_t log_count;
|
|
|
|
};
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
class CompactionPickerTest : public testing::Test {
|
2014-10-27 23:49:46 +01:00
|
|
|
public:
|
2014-11-13 22:41:43 +01:00
|
|
|
const Comparator* ucmp_;
|
|
|
|
InternalKeyComparator icmp_;
|
|
|
|
Options options_;
|
|
|
|
ImmutableCFOptions ioptions_;
|
|
|
|
MutableCFOptions mutable_cf_options_;
|
2014-10-27 23:49:46 +01:00
|
|
|
LevelCompactionPicker level_compaction_picker;
|
2014-11-13 22:41:43 +01:00
|
|
|
std::string cf_name_;
|
|
|
|
CountingLogger logger_;
|
|
|
|
LogBuffer log_buffer_;
|
|
|
|
uint32_t file_num_;
|
|
|
|
CompactionOptionsFIFO fifo_options_;
|
|
|
|
std::unique_ptr<VersionStorageInfo> vstorage_;
|
|
|
|
std::vector<std::unique_ptr<FileMetaData>> files_;
|
2014-10-27 23:49:46 +01:00
|
|
|
|
|
|
|
CompactionPickerTest()
|
2014-11-13 22:41:43 +01:00
|
|
|
: ucmp_(BytewiseComparator()),
|
|
|
|
icmp_(ucmp_),
|
|
|
|
ioptions_(options_),
|
|
|
|
mutable_cf_options_(options_, ioptions_),
|
|
|
|
level_compaction_picker(ioptions_, &icmp_),
|
|
|
|
cf_name_("dummy"),
|
|
|
|
log_buffer_(InfoLogLevel::INFO_LEVEL, &logger_),
|
|
|
|
file_num_(1),
|
|
|
|
vstorage_(nullptr) {
|
|
|
|
fifo_options_.max_table_files_size = 1;
|
|
|
|
mutable_cf_options_.RefreshDerivedOptions(ioptions_);
|
2014-12-16 06:48:16 +01:00
|
|
|
ioptions_.db_paths.emplace_back("dummy",
|
|
|
|
std::numeric_limits<uint64_t>::max());
|
2014-10-27 23:49:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
~CompactionPickerTest() {
|
2014-11-13 22:41:43 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void NewVersionStorage(int num_levels, CompactionStyle style) {
|
|
|
|
DeleteVersionStorage();
|
|
|
|
options_.num_levels = num_levels;
|
|
|
|
vstorage_.reset(new VersionStorageInfo(
|
|
|
|
&icmp_, ucmp_, options_.num_levels, style, nullptr));
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_);
|
2014-11-13 22:41:43 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void DeleteVersionStorage() {
|
|
|
|
vstorage_.reset();
|
|
|
|
files_.clear();
|
2014-10-27 23:49:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void Add(int level, uint32_t file_number, const char* smallest,
|
|
|
|
const char* largest, uint64_t file_size = 0, uint32_t path_id = 0,
|
|
|
|
SequenceNumber smallest_seq = 100,
|
|
|
|
SequenceNumber largest_seq = 100) {
|
2014-11-13 22:41:43 +01:00
|
|
|
assert(level < vstorage_->num_levels());
|
2014-10-27 23:49:46 +01:00
|
|
|
FileMetaData* f = new FileMetaData;
|
|
|
|
f->fd = FileDescriptor(file_number, path_id, file_size);
|
|
|
|
f->smallest = InternalKey(smallest, smallest_seq, kTypeValue);
|
|
|
|
f->largest = InternalKey(largest, largest_seq, kTypeValue);
|
2015-07-07 23:18:55 +02:00
|
|
|
f->smallest_seqno = smallest_seq;
|
|
|
|
f->largest_seqno = largest_seq;
|
2014-10-27 23:49:46 +01:00
|
|
|
f->compensated_file_size = file_size;
|
2014-10-31 16:48:19 +01:00
|
|
|
f->refs = 0;
|
2014-11-13 22:41:43 +01:00
|
|
|
vstorage_->AddFile(level, f);
|
|
|
|
files_.emplace_back(f);
|
2014-10-27 23:49:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
void UpdateVersionStorageInfo() {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
vstorage_->CalculateBaseBytes(ioptions_, mutable_cf_options_);
|
2014-11-13 22:41:43 +01:00
|
|
|
vstorage_->UpdateFilesBySize();
|
|
|
|
vstorage_->UpdateNumNonEmptyLevels();
|
|
|
|
vstorage_->GenerateFileIndexer();
|
|
|
|
vstorage_->GenerateLevelFilesBrief();
|
2015-02-05 01:20:25 +01:00
|
|
|
vstorage_->ComputeCompactionScore(mutable_cf_options_, fifo_options_);
|
Allowing L0 -> L1 trivial move on sorted data
Summary:
This diff updates the logic of how we do trivial move, now trivial move can run on any number of files in input level as long as they are not overlapping
The conditions for trivial move have been updated
Introduced conditions:
- Trivial move cannot happen if we have a compaction filter (except if the compaction is not manual)
- Input level files cannot be overlapping
Removed conditions:
- Trivial move only run when the compaction is not manual
- Input level should can contain only 1 file
More context on what tests failed because of Trivial move
```
DBTest.CompactionsGenerateMultipleFiles
This test is expecting compaction on a file in L0 to generate multiple files in L1, this test will fail with trivial move because we end up with one file in L1
```
```
DBTest.NoSpaceCompactRange
This test expect compaction to fail when we force environment to report running out of space, of course this is not valid in trivial move situation
because trivial move does not need any extra space, and did not check for that
```
```
DBTest.DropWrites
Similar to DBTest.NoSpaceCompactRange
```
```
DBTest.DeleteObsoleteFilesPendingOutputs
This test expect that a file in L2 is deleted after it's moved to L3, this is not valid with trivial move because although the file was moved it is now used by L3
```
```
CuckooTableDBTest.CompactionIntoMultipleFiles
Same as DBTest.CompactionsGenerateMultipleFiles
```
This diff is based on a work by @sdong https://reviews.facebook.net/D34149
Test Plan: make -j64 check
Reviewers: rven, sdong, igor
Reviewed By: igor
Subscribers: yhchiang, ott, march, dhruba, sdong
Differential Revision: https://reviews.facebook.net/D34797
2015-06-05 01:51:25 +02:00
|
|
|
vstorage_->GenerateLevel0NonOverlapping();
|
2014-11-13 22:41:43 +01:00
|
|
|
vstorage_->SetFinalized();
|
2014-10-27 23:49:46 +01:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Empty) {
|
2014-11-13 22:41:43 +01:00
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
2014-10-27 23:49:46 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
2014-11-13 22:41:43 +01:00
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
2014-10-27 23:49:46 +01:00
|
|
|
ASSERT_TRUE(compaction.get() == nullptr);
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Single) {
|
2014-11-13 22:41:43 +01:00
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger = 2;
|
2014-10-27 23:49:46 +01:00
|
|
|
Add(0, 1U, "p", "q");
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
2014-11-13 22:41:43 +01:00
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
2014-10-27 23:49:46 +01:00
|
|
|
ASSERT_TRUE(compaction.get() == nullptr);
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Level0Trigger) {
|
2014-11-13 22:41:43 +01:00
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger = 2;
|
2014-10-27 23:49:46 +01:00
|
|
|
Add(0, 1U, "150", "200");
|
|
|
|
Add(0, 2U, "200", "250");
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
2014-11-13 22:41:43 +01:00
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
2014-10-27 23:49:46 +01:00
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
2014-11-11 22:47:22 +01:00
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(0));
|
2014-10-27 23:49:46 +01:00
|
|
|
ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Level1Trigger) {
|
2014-11-13 22:41:43 +01:00
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
2014-10-27 23:49:46 +01:00
|
|
|
Add(1, 66U, "150", "200", 1000000000U);
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
2014-11-13 22:41:43 +01:00
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
2014-10-27 23:49:46 +01:00
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
2014-11-11 22:47:22 +01:00
|
|
|
ASSERT_EQ(1U, compaction->num_input_files(0));
|
2014-10-27 23:49:46 +01:00
|
|
|
ASSERT_EQ(66U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Level1Trigger2) {
|
2014-11-13 22:41:43 +01:00
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
2014-10-30 05:03:45 +01:00
|
|
|
Add(1, 66U, "150", "200", 1000000001U);
|
2014-10-27 23:49:46 +01:00
|
|
|
Add(1, 88U, "201", "300", 1000000000U);
|
2014-10-30 00:45:07 +01:00
|
|
|
Add(2, 6U, "150", "179", 1000000000U);
|
2014-10-27 23:49:46 +01:00
|
|
|
Add(2, 7U, "180", "220", 1000000000U);
|
2014-10-30 00:45:07 +01:00
|
|
|
Add(2, 8U, "221", "300", 1000000000U);
|
2014-10-27 23:49:46 +01:00
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
2014-11-13 22:41:43 +01:00
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
2014-10-27 23:49:46 +01:00
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
2014-11-11 22:47:22 +01:00
|
|
|
ASSERT_EQ(1U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(1));
|
2014-10-27 23:49:46 +01:00
|
|
|
ASSERT_EQ(66U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(6U, compaction->input(1, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(7U, compaction->input(1, 1)->fd.GetNumber());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, LevelMaxScore) {
|
2014-11-13 22:41:43 +01:00
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
|
|
|
mutable_cf_options_.target_file_size_base = 10000000;
|
|
|
|
mutable_cf_options_.target_file_size_multiplier = 10;
|
2014-10-30 00:45:07 +01:00
|
|
|
Add(0, 1U, "150", "200", 1000000000U);
|
|
|
|
// Level 1 score 1.2
|
|
|
|
Add(1, 66U, "150", "200", 6000000U);
|
|
|
|
Add(1, 88U, "201", "300", 6000000U);
|
|
|
|
// Level 2 score 1.8. File 7 is the largest. Should be picked
|
|
|
|
Add(2, 6U, "150", "179", 60000000U);
|
|
|
|
Add(2, 7U, "180", "220", 60000001U);
|
|
|
|
Add(2, 8U, "221", "300", 60000000U);
|
|
|
|
// Level 3 score slightly larger than 1
|
|
|
|
Add(3, 26U, "150", "170", 260000000U);
|
|
|
|
Add(3, 27U, "171", "179", 260000000U);
|
|
|
|
Add(3, 28U, "191", "220", 260000000U);
|
|
|
|
Add(3, 29U, "221", "300", 260000000U);
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
2014-11-13 22:41:43 +01:00
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
2014-10-30 00:45:07 +01:00
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
2014-11-11 22:47:22 +01:00
|
|
|
ASSERT_EQ(1U, compaction->num_input_files(0));
|
2014-10-30 00:45:07 +01:00
|
|
|
ASSERT_EQ(7U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, NeedsCompactionLevel) {
|
2014-11-13 22:41:43 +01:00
|
|
|
const int kLevels = 6;
|
|
|
|
const int kFileCount = 20;
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
|
2014-11-13 22:41:43 +01:00
|
|
|
for (int level = 0; level < kLevels - 1; ++level) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
NewVersionStorage(kLevels, kCompactionStyleLevel);
|
|
|
|
uint64_t file_size = vstorage_->MaxBytesForLevel(level) * 2 / kFileCount;
|
2014-11-13 22:41:43 +01:00
|
|
|
for (int file_count = 1; file_count <= kFileCount; ++file_count) {
|
|
|
|
// start a brand new version in each test.
|
|
|
|
NewVersionStorage(kLevels, kCompactionStyleLevel);
|
|
|
|
for (int i = 0; i < file_count; ++i) {
|
2014-11-25 05:44:49 +01:00
|
|
|
Add(level, i, ToString((i + 100) * 1000).c_str(),
|
|
|
|
ToString((i + 100) * 1000 + 999).c_str(),
|
2014-11-13 22:41:43 +01:00
|
|
|
file_size, 0, i * 100, i * 100 + 99);
|
|
|
|
}
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
ASSERT_EQ(vstorage_->CompactionScoreLevel(0), level);
|
2014-11-14 00:21:04 +01:00
|
|
|
ASSERT_EQ(level_compaction_picker.NeedsCompaction(vstorage_.get()),
|
|
|
|
vstorage_->CompactionScore(0) >= 1);
|
2014-11-13 22:41:43 +01:00
|
|
|
// release the version storage
|
|
|
|
DeleteVersionStorage();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Level0TriggerDynamic) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
int num_levels = ioptions_.num_levels;
|
|
|
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger = 2;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_base = 200;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_multiplier = 10;
|
|
|
|
NewVersionStorage(num_levels, kCompactionStyleLevel);
|
|
|
|
Add(0, 1U, "150", "200");
|
|
|
|
Add(0, 2U, "200", "250");
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber());
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
ASSERT_EQ(1, static_cast<int>(compaction->num_input_levels()));
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
ASSERT_EQ(num_levels - 1, compaction->output_level());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Level0TriggerDynamic2) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
int num_levels = ioptions_.num_levels;
|
|
|
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger = 2;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_base = 200;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_multiplier = 10;
|
|
|
|
NewVersionStorage(num_levels, kCompactionStyleLevel);
|
|
|
|
Add(0, 1U, "150", "200");
|
|
|
|
Add(0, 2U, "200", "250");
|
|
|
|
Add(num_levels - 1, 3U, "200", "250", 300U);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
ASSERT_EQ(vstorage_->base_level(), num_levels - 2);
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber());
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
ASSERT_EQ(1, static_cast<int>(compaction->num_input_levels()));
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
ASSERT_EQ(num_levels - 2, compaction->output_level());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Level0TriggerDynamic3) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
int num_levels = ioptions_.num_levels;
|
|
|
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger = 2;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_base = 200;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_multiplier = 10;
|
|
|
|
NewVersionStorage(num_levels, kCompactionStyleLevel);
|
|
|
|
Add(0, 1U, "150", "200");
|
|
|
|
Add(0, 2U, "200", "250");
|
|
|
|
Add(num_levels - 1, 3U, "200", "250", 300U);
|
|
|
|
Add(num_levels - 1, 4U, "300", "350", 3000U);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
ASSERT_EQ(vstorage_->base_level(), num_levels - 3);
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber());
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
ASSERT_EQ(1, static_cast<int>(compaction->num_input_levels()));
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
ASSERT_EQ(num_levels - 3, compaction->output_level());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, Level0TriggerDynamic4) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
int num_levels = ioptions_.num_levels;
|
|
|
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger = 2;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_base = 200;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_multiplier = 10;
|
|
|
|
NewVersionStorage(num_levels, kCompactionStyleLevel);
|
|
|
|
Add(0, 1U, "150", "200");
|
|
|
|
Add(0, 2U, "200", "250");
|
|
|
|
Add(num_levels - 1, 3U, "200", "250", 300U);
|
|
|
|
Add(num_levels - 1, 4U, "300", "350", 3000U);
|
|
|
|
Add(num_levels - 3, 5U, "150", "180", 3U);
|
|
|
|
Add(num_levels - 3, 6U, "181", "300", 3U);
|
|
|
|
Add(num_levels - 3, 7U, "400", "450", 3U);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
ASSERT_EQ(vstorage_->base_level(), num_levels - 3);
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber());
|
Make Compaction class easier to use
Summary:
The goal of this diff is to make Compaction class easier to use. This should also make new compaction algorithms easier to write (like CompactFiles from @yhchiang and dynamic leveled and multi-leveled universal from @sdong).
Here are couple of things demonstrating that Compaction class is hard to use:
1. we have two constructors of Compaction class
2. there's this thing called grandparents_, but it appears to only be setup for leveled compaction and not compactfiles
3. it's easy to introduce a subtle and dangerous bug like this: D36225
4. SetupBottomMostLevel() is hard to understand and it shouldn't be. See this comment: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction.cc#L236-L241. It also made it harder for @yhchiang to write CompactFiles, as evidenced by this: https://github.com/facebook/rocksdb/blob/afbafeaeaebfd27a0f3e992fee8e0c57d07658fa/db/compaction_picker.cc#L204-L210
The problem is that we create Compaction object, which holds a lot of state, and then pass it around to some functions. After those functions are done mutating, then we call couple of functions on Compaction object, like SetupBottommostLevel() and MarkFilesBeingCompacted(). It is very hard to see what's happening with all that Compaction's state while it's travelling across different functions. If you're writing a new PickCompaction() function you need to try really hard to understand what are all the functions you need to run on Compaction object and what state you need to setup.
My proposed solution is to make important parts of Compaction immutable after construction. PickCompaction() should calculate compaction inputs and then pass them onto Compaction object once they are finalized. That makes it easy to create a new compaction -- just provide all the parameters to the constructor and you're done. No need to call confusing functions after you created your object.
This diff doesn't fully achieve that goal, but it comes pretty close. Here are some of the changes:
* have one Compaction constructor instead of two.
* inputs_ is constant after construction
* MarkFilesBeingCompacted() is now private to Compaction class and automatically called on construction/destruction.
* SetupBottommostLevel() is gone. Compaction figures it out on its own based on the input.
* CompactionPicker's functions are not passing around Compaction object anymore. They are only passing around the state that they need.
Test Plan:
make check
make asan_check
make valgrind_check
Reviewers: rven, anthony, sdong, yhchiang
Reviewed By: yhchiang
Subscribers: sdong, yhchiang, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D36687
2015-04-11 00:01:54 +02:00
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(1));
|
|
|
|
ASSERT_EQ(num_levels - 3, compaction->level(1));
|
|
|
|
ASSERT_EQ(5U, compaction->input(1, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(6U, compaction->input(1, 1)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2, static_cast<int>(compaction->num_input_levels()));
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
ASSERT_EQ(num_levels - 3, compaction->output_level());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, LevelTriggerDynamic4) {
|
options.level_compaction_dynamic_level_bytes to allow RocksDB to pick size bases of levels dynamically.
Summary:
When having fixed max_bytes_for_level_base, the ratio of size of largest level and the second one can range from 0 to the multiplier. This makes LSM tree frequently irregular and unpredictable. It can also cause poor space amplification in some cases.
In this improvement (proposed by Igor Kabiljo), we introduce a parameter option.level_compaction_use_dynamic_max_bytes. When turning it on, RocksDB is free to pick a level base in the range of (options.max_bytes_for_level_base/options.max_bytes_for_level_multiplier, options.max_bytes_for_level_base] so that real level ratios are close to options.max_bytes_for_level_multiplier.
Test Plan: New unit tests and pass tests suites including valgrind.
Reviewers: MarkCallaghan, rven, yhchiang, igor, ikabiljo
Reviewed By: ikabiljo
Subscribers: yoshinorim, ikabiljo, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D31437
2015-02-05 20:44:17 +01:00
|
|
|
int num_levels = ioptions_.num_levels;
|
|
|
|
ioptions_.level_compaction_dynamic_level_bytes = true;
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger = 2;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_base = 200;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_multiplier = 10;
|
|
|
|
NewVersionStorage(num_levels, kCompactionStyleLevel);
|
|
|
|
Add(0, 1U, "150", "200");
|
|
|
|
Add(num_levels - 1, 3U, "200", "250", 300U);
|
|
|
|
Add(num_levels - 1, 4U, "300", "350", 3000U);
|
|
|
|
Add(num_levels - 1, 4U, "400", "450", 3U);
|
|
|
|
Add(num_levels - 2, 5U, "150", "180", 300U);
|
|
|
|
Add(num_levels - 2, 6U, "181", "350", 500U);
|
|
|
|
Add(num_levels - 2, 7U, "400", "450", 200U);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
|
|
|
ASSERT_EQ(1U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(6U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(1));
|
|
|
|
ASSERT_EQ(3U, compaction->input(1, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(4U, compaction->input(1, 1)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_levels());
|
|
|
|
ASSERT_EQ(num_levels - 1, compaction->output_level());
|
|
|
|
}
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, NeedsCompactionUniversal) {
|
2014-11-13 22:41:43 +01:00
|
|
|
NewVersionStorage(1, kCompactionStyleUniversal);
|
|
|
|
UniversalCompactionPicker universal_compaction_picker(
|
|
|
|
ioptions_, &icmp_);
|
|
|
|
// must return false when there's no files.
|
2014-11-14 00:21:04 +01:00
|
|
|
ASSERT_EQ(universal_compaction_picker.NeedsCompaction(vstorage_.get()),
|
|
|
|
false);
|
Add experimental API MarkForCompaction()
Summary:
Some Mongo+Rocks datasets in Parse's environment are not doing compactions very frequently. During the quiet period (with no IO), we'd like to schedule compactions so that our reads become faster. Also, aggressively compacting during quiet periods helps when write bursts happen. In addition, we also want to compact files that are containing deleted key ranges (like old oplog keys).
All of this is currently not possible with CompactRange() because it's single-threaded and blocks all other compactions from happening. Running CompactRange() risks an issue of blocking writes because we generate too much Level 0 files before the compaction is over. Stopping writes is very dangerous because they hold transaction locks. We tried running manual compaction once on Mongo+Rocks and everything fell apart.
MarkForCompaction() solves all of those problems. This is very light-weight manual compaction. It is lower priority than automatic compactions, which means it shouldn't interfere with background process keeping the LSM tree clean. However, if no automatic compactions need to be run (or we have extra background threads available), we will start compacting files that are marked for compaction.
Test Plan: added a new unit test
Reviewers: yhchiang, rven, MarkCallaghan, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D37083
2015-04-18 01:44:45 +02:00
|
|
|
UpdateVersionStorageInfo();
|
2014-11-13 22:41:43 +01:00
|
|
|
|
|
|
|
// verify the trigger given different number of L0 files.
|
|
|
|
for (int i = 1;
|
2014-11-14 00:21:04 +01:00
|
|
|
i <= mutable_cf_options_.level0_file_num_compaction_trigger * 2; ++i) {
|
Add experimental API MarkForCompaction()
Summary:
Some Mongo+Rocks datasets in Parse's environment are not doing compactions very frequently. During the quiet period (with no IO), we'd like to schedule compactions so that our reads become faster. Also, aggressively compacting during quiet periods helps when write bursts happen. In addition, we also want to compact files that are containing deleted key ranges (like old oplog keys).
All of this is currently not possible with CompactRange() because it's single-threaded and blocks all other compactions from happening. Running CompactRange() risks an issue of blocking writes because we generate too much Level 0 files before the compaction is over. Stopping writes is very dangerous because they hold transaction locks. We tried running manual compaction once on Mongo+Rocks and everything fell apart.
MarkForCompaction() solves all of those problems. This is very light-weight manual compaction. It is lower priority than automatic compactions, which means it shouldn't interfere with background process keeping the LSM tree clean. However, if no automatic compactions need to be run (or we have extra background threads available), we will start compacting files that are marked for compaction.
Test Plan: added a new unit test
Reviewers: yhchiang, rven, MarkCallaghan, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D37083
2015-04-18 01:44:45 +02:00
|
|
|
NewVersionStorage(1, kCompactionStyleUniversal);
|
2014-11-25 05:44:49 +01:00
|
|
|
Add(0, i, ToString((i + 100) * 1000).c_str(),
|
|
|
|
ToString((i + 100) * 1000 + 999).c_str(), 1000000, 0, i * 100,
|
2014-11-14 00:21:04 +01:00
|
|
|
i * 100 + 99);
|
Add experimental API MarkForCompaction()
Summary:
Some Mongo+Rocks datasets in Parse's environment are not doing compactions very frequently. During the quiet period (with no IO), we'd like to schedule compactions so that our reads become faster. Also, aggressively compacting during quiet periods helps when write bursts happen. In addition, we also want to compact files that are containing deleted key ranges (like old oplog keys).
All of this is currently not possible with CompactRange() because it's single-threaded and blocks all other compactions from happening. Running CompactRange() risks an issue of blocking writes because we generate too much Level 0 files before the compaction is over. Stopping writes is very dangerous because they hold transaction locks. We tried running manual compaction once on Mongo+Rocks and everything fell apart.
MarkForCompaction() solves all of those problems. This is very light-weight manual compaction. It is lower priority than automatic compactions, which means it shouldn't interfere with background process keeping the LSM tree clean. However, if no automatic compactions need to be run (or we have extra background threads available), we will start compacting files that are marked for compaction.
Test Plan: added a new unit test
Reviewers: yhchiang, rven, MarkCallaghan, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D37083
2015-04-18 01:44:45 +02:00
|
|
|
UpdateVersionStorageInfo();
|
2014-11-14 00:21:04 +01:00
|
|
|
ASSERT_EQ(level_compaction_picker.NeedsCompaction(vstorage_.get()),
|
|
|
|
vstorage_->CompactionScore(0) >= 1);
|
2014-11-13 22:41:43 +01:00
|
|
|
}
|
|
|
|
}
|
2015-07-07 23:18:55 +02:00
|
|
|
// Tests if the files can be trivially moved in multi level
|
|
|
|
// universal compaction when allow_trivial_move option is set
|
|
|
|
// In this test as the input files overlaps, they cannot
|
|
|
|
// be trivially moved.
|
|
|
|
|
|
|
|
TEST_F(CompactionPickerTest, CannotTrivialMoveUniversal) {
|
|
|
|
const uint64_t kFileSize = 100000;
|
|
|
|
|
|
|
|
ioptions_.compaction_options_universal.allow_trivial_move = true;
|
|
|
|
NewVersionStorage(1, kCompactionStyleUniversal);
|
|
|
|
UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_);
|
|
|
|
// must return false when there's no files.
|
|
|
|
ASSERT_EQ(universal_compaction_picker.NeedsCompaction(vstorage_.get()),
|
|
|
|
false);
|
|
|
|
|
|
|
|
NewVersionStorage(3, kCompactionStyleUniversal);
|
|
|
|
|
|
|
|
Add(0, 1U, "150", "200", kFileSize, 0, 500, 550);
|
|
|
|
Add(0, 2U, "201", "250", kFileSize, 0, 401, 450);
|
|
|
|
Add(0, 4U, "260", "300", kFileSize, 0, 260, 300);
|
|
|
|
Add(1, 5U, "100", "151", kFileSize, 0, 200, 251);
|
|
|
|
Add(1, 3U, "301", "350", kFileSize, 0, 101, 150);
|
|
|
|
Add(2, 6U, "120", "200", kFileSize, 0, 20, 100);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(
|
|
|
|
universal_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
|
|
|
|
ASSERT_TRUE(!compaction->is_trivial_move());
|
|
|
|
}
|
|
|
|
// Tests if the files can be trivially moved in multi level
|
|
|
|
// universal compaction when allow_trivial_move option is set
|
|
|
|
// In this test as the input files doesn't overlaps, they should
|
|
|
|
// be trivially moved.
|
|
|
|
TEST_F(CompactionPickerTest, AllowsTrivialMoveUniversal) {
|
|
|
|
const uint64_t kFileSize = 100000;
|
|
|
|
|
|
|
|
ioptions_.compaction_options_universal.allow_trivial_move = true;
|
|
|
|
UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_);
|
|
|
|
|
|
|
|
NewVersionStorage(3, kCompactionStyleUniversal);
|
|
|
|
|
|
|
|
Add(0, 1U, "150", "200", kFileSize, 0, 500, 550);
|
|
|
|
Add(0, 2U, "201", "250", kFileSize, 0, 401, 450);
|
|
|
|
Add(0, 4U, "260", "300", kFileSize, 0, 260, 300);
|
|
|
|
Add(1, 5U, "010", "080", kFileSize, 0, 200, 251);
|
|
|
|
Add(2, 3U, "301", "350", kFileSize, 0, 101, 150);
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(
|
|
|
|
universal_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
|
|
|
|
ASSERT_TRUE(compaction->is_trivial_move());
|
|
|
|
}
|
2014-11-13 22:41:43 +01:00
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
TEST_F(CompactionPickerTest, NeedsCompactionFIFO) {
|
2014-11-13 22:41:43 +01:00
|
|
|
NewVersionStorage(1, kCompactionStyleFIFO);
|
|
|
|
const int kFileCount =
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger * 3;
|
|
|
|
const uint64_t kFileSize = 100000;
|
|
|
|
const uint64_t kMaxSize = kFileSize * kFileCount / 2;
|
|
|
|
|
|
|
|
fifo_options_.max_table_files_size = kMaxSize;
|
|
|
|
ioptions_.compaction_options_fifo = fifo_options_;
|
|
|
|
FIFOCompactionPicker fifo_compaction_picker(ioptions_, &icmp_);
|
|
|
|
|
Add experimental API MarkForCompaction()
Summary:
Some Mongo+Rocks datasets in Parse's environment are not doing compactions very frequently. During the quiet period (with no IO), we'd like to schedule compactions so that our reads become faster. Also, aggressively compacting during quiet periods helps when write bursts happen. In addition, we also want to compact files that are containing deleted key ranges (like old oplog keys).
All of this is currently not possible with CompactRange() because it's single-threaded and blocks all other compactions from happening. Running CompactRange() risks an issue of blocking writes because we generate too much Level 0 files before the compaction is over. Stopping writes is very dangerous because they hold transaction locks. We tried running manual compaction once on Mongo+Rocks and everything fell apart.
MarkForCompaction() solves all of those problems. This is very light-weight manual compaction. It is lower priority than automatic compactions, which means it shouldn't interfere with background process keeping the LSM tree clean. However, if no automatic compactions need to be run (or we have extra background threads available), we will start compacting files that are marked for compaction.
Test Plan: added a new unit test
Reviewers: yhchiang, rven, MarkCallaghan, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D37083
2015-04-18 01:44:45 +02:00
|
|
|
UpdateVersionStorageInfo();
|
2014-11-13 22:41:43 +01:00
|
|
|
// must return false when there's no files.
|
2014-11-14 00:21:04 +01:00
|
|
|
ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()), false);
|
2014-11-13 22:41:43 +01:00
|
|
|
|
|
|
|
// verify whether compaction is needed based on the current
|
|
|
|
// size of L0 files.
|
|
|
|
uint64_t current_size = 0;
|
|
|
|
for (int i = 1; i <= kFileCount; ++i) {
|
Add experimental API MarkForCompaction()
Summary:
Some Mongo+Rocks datasets in Parse's environment are not doing compactions very frequently. During the quiet period (with no IO), we'd like to schedule compactions so that our reads become faster. Also, aggressively compacting during quiet periods helps when write bursts happen. In addition, we also want to compact files that are containing deleted key ranges (like old oplog keys).
All of this is currently not possible with CompactRange() because it's single-threaded and blocks all other compactions from happening. Running CompactRange() risks an issue of blocking writes because we generate too much Level 0 files before the compaction is over. Stopping writes is very dangerous because they hold transaction locks. We tried running manual compaction once on Mongo+Rocks and everything fell apart.
MarkForCompaction() solves all of those problems. This is very light-weight manual compaction. It is lower priority than automatic compactions, which means it shouldn't interfere with background process keeping the LSM tree clean. However, if no automatic compactions need to be run (or we have extra background threads available), we will start compacting files that are marked for compaction.
Test Plan: added a new unit test
Reviewers: yhchiang, rven, MarkCallaghan, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D37083
2015-04-18 01:44:45 +02:00
|
|
|
NewVersionStorage(1, kCompactionStyleFIFO);
|
2014-11-25 05:44:49 +01:00
|
|
|
Add(0, i, ToString((i + 100) * 1000).c_str(),
|
|
|
|
ToString((i + 100) * 1000 + 999).c_str(),
|
2014-11-13 22:41:43 +01:00
|
|
|
kFileSize, 0, i * 100, i * 100 + 99);
|
|
|
|
current_size += kFileSize;
|
Add experimental API MarkForCompaction()
Summary:
Some Mongo+Rocks datasets in Parse's environment are not doing compactions very frequently. During the quiet period (with no IO), we'd like to schedule compactions so that our reads become faster. Also, aggressively compacting during quiet periods helps when write bursts happen. In addition, we also want to compact files that are containing deleted key ranges (like old oplog keys).
All of this is currently not possible with CompactRange() because it's single-threaded and blocks all other compactions from happening. Running CompactRange() risks an issue of blocking writes because we generate too much Level 0 files before the compaction is over. Stopping writes is very dangerous because they hold transaction locks. We tried running manual compaction once on Mongo+Rocks and everything fell apart.
MarkForCompaction() solves all of those problems. This is very light-weight manual compaction. It is lower priority than automatic compactions, which means it shouldn't interfere with background process keeping the LSM tree clean. However, if no automatic compactions need to be run (or we have extra background threads available), we will start compacting files that are marked for compaction.
Test Plan: added a new unit test
Reviewers: yhchiang, rven, MarkCallaghan, sdong
Reviewed By: sdong
Subscribers: yoshinorim, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D37083
2015-04-18 01:44:45 +02:00
|
|
|
UpdateVersionStorageInfo();
|
2014-11-14 00:21:04 +01:00
|
|
|
ASSERT_EQ(level_compaction_picker.NeedsCompaction(vstorage_.get()),
|
|
|
|
vstorage_->CompactionScore(0) >= 1);
|
2014-11-13 22:41:43 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-12 20:16:25 +02:00
|
|
|
// This test exhibits the bug where we don't properly reset parent_index in
|
|
|
|
// PickCompaction()
|
|
|
|
TEST_F(CompactionPickerTest, ParentIndexResetBug) {
|
|
|
|
int num_levels = ioptions_.num_levels;
|
|
|
|
mutable_cf_options_.level0_file_num_compaction_trigger = 2;
|
|
|
|
mutable_cf_options_.max_bytes_for_level_base = 200;
|
|
|
|
NewVersionStorage(num_levels, kCompactionStyleLevel);
|
|
|
|
Add(0, 1U, "150", "200"); // <- marked for compaction
|
|
|
|
Add(1, 3U, "400", "500", 600); // <- this one needs compacting
|
|
|
|
Add(2, 4U, "150", "200");
|
|
|
|
Add(2, 5U, "201", "210");
|
|
|
|
Add(2, 6U, "300", "310");
|
|
|
|
Add(2, 7U, "400", "500"); // <- being compacted
|
|
|
|
|
|
|
|
vstorage_->LevelFiles(2)[3]->being_compacted = true;
|
|
|
|
vstorage_->LevelFiles(0)[0]->marked_for_compaction = true;
|
|
|
|
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
}
|
2014-11-13 22:41:43 +01:00
|
|
|
|
2015-07-07 07:25:27 +02:00
|
|
|
// This test checks ExpandWhileOverlapping() by having overlapping user keys
|
|
|
|
// ranges (with different sequence numbers) in the input files.
|
|
|
|
TEST_F(CompactionPickerTest, OverlappingUserKeys) {
|
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
|
|
|
Add(1, 1U, "100", "150", 1U);
|
|
|
|
// Overlapping user keys
|
|
|
|
Add(1, 2U, "200", "400", 1U);
|
|
|
|
Add(1, 3U, "400", "500", 1000000000U, 0, 0);
|
|
|
|
Add(2, 4U, "600", "700", 1U);
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
|
|
|
ASSERT_EQ(1U, compaction->num_input_levels());
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(2U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(3U, compaction->input(0, 1)->fd.GetNumber());
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(CompactionPickerTest, OverlappingUserKeys2) {
|
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
|
|
|
// Overlapping user keys on same level and output level
|
|
|
|
Add(1, 1U, "200", "400", 1000000000U);
|
|
|
|
Add(1, 2U, "400", "500", 1U, 0, 0);
|
|
|
|
Add(2, 3U, "400", "600", 1U);
|
|
|
|
// The following file is not in the compaction despite overlapping user keys
|
|
|
|
Add(2, 4U, "600", "700", 1U, 0, 0);
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_levels());
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(1U, compaction->num_input_files(1));
|
|
|
|
ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(3U, compaction->input(1, 0)->fd.GetNumber());
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST_F(CompactionPickerTest, OverlappingUserKeys3) {
|
|
|
|
NewVersionStorage(6, kCompactionStyleLevel);
|
|
|
|
// Chain of overlapping user key ranges (forces ExpandWhileOverlapping() to
|
|
|
|
// expand multiple times)
|
|
|
|
Add(1, 1U, "100", "150", 1U);
|
|
|
|
Add(1, 2U, "150", "200", 1U, 0, 0);
|
|
|
|
Add(1, 3U, "200", "250", 1000000000U, 0, 0);
|
|
|
|
Add(1, 4U, "250", "300", 1U, 0, 0);
|
|
|
|
Add(1, 5U, "300", "350", 1U, 0, 0);
|
|
|
|
// Output level overlaps with the beginning and the end of the chain
|
|
|
|
Add(2, 6U, "050", "100", 1U);
|
|
|
|
Add(2, 7U, "350", "400", 1U);
|
|
|
|
UpdateVersionStorageInfo();
|
|
|
|
|
|
|
|
std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
|
|
|
|
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
|
|
|
ASSERT_TRUE(compaction.get() != nullptr);
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_levels());
|
|
|
|
ASSERT_EQ(5U, compaction->num_input_files(0));
|
|
|
|
ASSERT_EQ(2U, compaction->num_input_files(1));
|
|
|
|
ASSERT_EQ(1U, compaction->input(0, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(2U, compaction->input(0, 1)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(3U, compaction->input(0, 2)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(4U, compaction->input(0, 3)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(5U, compaction->input(0, 4)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(6U, compaction->input(1, 0)->fd.GetNumber());
|
|
|
|
ASSERT_EQ(7U, compaction->input(1, 1)->fd.GetNumber());
|
|
|
|
}
|
|
|
|
|
2014-10-27 23:49:46 +01:00
|
|
|
} // namespace rocksdb
|
|
|
|
|
2015-03-17 22:08:00 +01:00
|
|
|
int main(int argc, char** argv) {
|
|
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
|
|
return RUN_ALL_TESTS();
|
|
|
|
}
|