[RocksDB] Enable manual compaction to move files back to an appropriate level.
Summary: As title. This diff added an option reduce_level to CompactRange. When set to true, it will try to move the files back to the minimum level sufficient to hold the data set. Note that the default is set to true now, just to excerise it in all existing tests. Will set the default to false before check-in, for backward compatibility. Test Plan: make check; Reviewers: dhruba, emayanke CC: leveldb Differential Revision: https://reviews.facebook.net/D11553
This commit is contained in:
parent
e9b675bd94
commit
9ee68871dc
@ -182,7 +182,9 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
|||||||
stats_(options.num_levels),
|
stats_(options.num_levels),
|
||||||
delayed_writes_(0),
|
delayed_writes_(0),
|
||||||
last_flushed_sequence_(0),
|
last_flushed_sequence_(0),
|
||||||
storage_options_(options) {
|
storage_options_(options),
|
||||||
|
bg_work_gate_closed_(false),
|
||||||
|
refitting_level_(false) {
|
||||||
|
|
||||||
mem_->Ref();
|
mem_->Ref();
|
||||||
|
|
||||||
@ -900,7 +902,8 @@ Status DBImpl::CompactMemTable(bool* madeProgress) {
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
|
void DBImpl::CompactRange(const Slice* begin, const Slice* end,
|
||||||
|
bool reduce_level) {
|
||||||
int max_level_with_files = 1;
|
int max_level_with_files = 1;
|
||||||
{
|
{
|
||||||
MutexLock l(&mutex_);
|
MutexLock l(&mutex_);
|
||||||
@ -915,6 +918,78 @@ void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
|
|||||||
for (int level = 0; level < max_level_with_files; level++) {
|
for (int level = 0; level < max_level_with_files; level++) {
|
||||||
TEST_CompactRange(level, begin, end);
|
TEST_CompactRange(level, begin, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (reduce_level) {
|
||||||
|
ReFitLevel(max_level_with_files);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// return the same level if it cannot be moved
|
||||||
|
int DBImpl::FindMinimumEmptyLevelFitting(int level) {
|
||||||
|
mutex_.AssertHeld();
|
||||||
|
int minimum_level = level;
|
||||||
|
for (int i = level - 1; i > 0; ++i) {
|
||||||
|
// stop if level i is not empty
|
||||||
|
if (versions_->NumLevelFiles(i) > 0) break;
|
||||||
|
|
||||||
|
// stop if level i is too small (cannot fit the level files)
|
||||||
|
if (versions_->MaxBytesForLevel(i) < versions_->NumLevelBytes(level)) break;
|
||||||
|
|
||||||
|
minimum_level = i;
|
||||||
|
}
|
||||||
|
return minimum_level;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DBImpl::ReFitLevel(int level) {
|
||||||
|
assert(level < NumberLevels());
|
||||||
|
|
||||||
|
MutexLock l(&mutex_);
|
||||||
|
|
||||||
|
// only allow one thread refitting
|
||||||
|
if (refitting_level_) {
|
||||||
|
Log(options_.info_log, "ReFitLevel: another thread is refitting");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
refitting_level_ = true;
|
||||||
|
|
||||||
|
// wait for all background threads to stop
|
||||||
|
bg_work_gate_closed_ = true;
|
||||||
|
while (bg_compaction_scheduled_ > 0) {
|
||||||
|
Log(options_.info_log,
|
||||||
|
"RefitLevel: waiting for background threads to stop: %d",
|
||||||
|
bg_compaction_scheduled_);
|
||||||
|
bg_cv_.Wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
// move to a smaller level
|
||||||
|
int to_level = FindMinimumEmptyLevelFitting(level);
|
||||||
|
|
||||||
|
assert(to_level <= level);
|
||||||
|
|
||||||
|
if (to_level < level) {
|
||||||
|
Log(options_.info_log, "Before refitting:\n%s",
|
||||||
|
versions_->current()->DebugString().data());
|
||||||
|
|
||||||
|
VersionEdit edit(NumberLevels());
|
||||||
|
for (const auto& f : versions_->current()->files_[level]) {
|
||||||
|
edit.DeleteFile(level, f->number);
|
||||||
|
edit.AddFile(to_level, f->number, f->file_size, f->smallest, f->largest);
|
||||||
|
}
|
||||||
|
Log(options_.info_log, "Apply version edit:\n%s",
|
||||||
|
edit.DebugString().data());
|
||||||
|
|
||||||
|
auto status = versions_->LogAndApply(&edit, &mutex_);
|
||||||
|
|
||||||
|
Log(options_.info_log, "LogAndApply: %s\n", status.ToString().data());
|
||||||
|
|
||||||
|
if (status.ok()) {
|
||||||
|
Log(options_.info_log, "After refitting:\n%s",
|
||||||
|
versions_->current()->DebugString().data());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
refitting_level_ = false;
|
||||||
|
bg_work_gate_closed_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int DBImpl::NumberLevels() {
|
int DBImpl::NumberLevels() {
|
||||||
@ -1238,7 +1313,9 @@ Status DBImpl::TEST_WaitForCompact() {
|
|||||||
|
|
||||||
void DBImpl::MaybeScheduleCompaction() {
|
void DBImpl::MaybeScheduleCompaction() {
|
||||||
mutex_.AssertHeld();
|
mutex_.AssertHeld();
|
||||||
if (bg_compaction_scheduled_ >= options_.max_background_compactions) {
|
if (bg_work_gate_closed_) {
|
||||||
|
// gate closed for backgrond work
|
||||||
|
} else if (bg_compaction_scheduled_ >= options_.max_background_compactions) {
|
||||||
// Already scheduled
|
// Already scheduled
|
||||||
} else if (shutting_down_.Acquire_Load()) {
|
} else if (shutting_down_.Acquire_Load()) {
|
||||||
// DB is being deleted; no more background compactions
|
// DB is being deleted; no more background compactions
|
||||||
|
17
db/db_impl.h
17
db/db_impl.h
@ -57,7 +57,8 @@ class DBImpl : public DB {
|
|||||||
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
virtual void ReleaseSnapshot(const Snapshot* snapshot);
|
||||||
virtual bool GetProperty(const Slice& property, std::string* value);
|
virtual bool GetProperty(const Slice& property, std::string* value);
|
||||||
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
|
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
|
||||||
virtual void CompactRange(const Slice* begin, const Slice* end);
|
virtual void CompactRange(const Slice* begin, const Slice* end,
|
||||||
|
bool reduce_level = false);
|
||||||
virtual int NumberLevels();
|
virtual int NumberLevels();
|
||||||
virtual int MaxMemCompactionLevel();
|
virtual int MaxMemCompactionLevel();
|
||||||
virtual int Level0StopWriteTrigger();
|
virtual int Level0StopWriteTrigger();
|
||||||
@ -221,6 +222,14 @@ class DBImpl : public DB {
|
|||||||
// dump leveldb.stats to LOG
|
// dump leveldb.stats to LOG
|
||||||
void MaybeDumpStats();
|
void MaybeDumpStats();
|
||||||
|
|
||||||
|
// Return the minimum empty level that could hold the total data in the
|
||||||
|
// input level. Return the input level, if such level could not be found.
|
||||||
|
int FindMinimumEmptyLevelFitting(int level);
|
||||||
|
|
||||||
|
// Move the files in the input level to the minimum level that could hold
|
||||||
|
// the data set.
|
||||||
|
void ReFitLevel(int level);
|
||||||
|
|
||||||
// Constant after construction
|
// Constant after construction
|
||||||
const InternalFilterPolicy internal_filter_policy_;
|
const InternalFilterPolicy internal_filter_policy_;
|
||||||
bool owns_info_log_;
|
bool owns_info_log_;
|
||||||
@ -370,6 +379,12 @@ class DBImpl : public DB {
|
|||||||
// The options to access storage files
|
// The options to access storage files
|
||||||
const EnvOptions storage_options_;
|
const EnvOptions storage_options_;
|
||||||
|
|
||||||
|
// A value of true temporarily disables scheduling of background work
|
||||||
|
bool bg_work_gate_closed_;
|
||||||
|
|
||||||
|
// Guard against multiple concurrent refitting
|
||||||
|
bool refitting_level_;
|
||||||
|
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
DBImpl(const DBImpl&);
|
DBImpl(const DBImpl&);
|
||||||
void operator=(const DBImpl&);
|
void operator=(const DBImpl&);
|
||||||
|
@ -47,7 +47,8 @@ public:
|
|||||||
virtual Status Write(const WriteOptions& options, WriteBatch* updates) {
|
virtual Status Write(const WriteOptions& options, WriteBatch* updates) {
|
||||||
return Status::NotSupported("Not supported operation in read only mode.");
|
return Status::NotSupported("Not supported operation in read only mode.");
|
||||||
}
|
}
|
||||||
virtual void CompactRange(const Slice* begin, const Slice* end) {
|
virtual void CompactRange(const Slice* begin, const Slice* end,
|
||||||
|
bool reduce_level = false) {
|
||||||
}
|
}
|
||||||
virtual Status DisableFileDeletions() {
|
virtual Status DisableFileDeletions() {
|
||||||
return Status::NotSupported("Not supported operation in read only mode.");
|
return Status::NotSupported("Not supported operation in read only mode.");
|
||||||
|
@ -3058,7 +3058,8 @@ class ModelDB: public DB {
|
|||||||
sizes[i] = 0;
|
sizes[i] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
virtual void CompactRange(const Slice* start, const Slice* end) {
|
virtual void CompactRange(const Slice* start, const Slice* end,
|
||||||
|
bool reduce_level ) {
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual int NumberLevels()
|
virtual int NumberLevels()
|
||||||
|
@ -136,6 +136,7 @@ class Version {
|
|||||||
private:
|
private:
|
||||||
friend class Compaction;
|
friend class Compaction;
|
||||||
friend class VersionSet;
|
friend class VersionSet;
|
||||||
|
friend class DBImpl;
|
||||||
|
|
||||||
class LevelFileNumIterator;
|
class LevelFileNumIterator;
|
||||||
Iterator* NewConcatenatingIterator(const ReadOptions&,
|
Iterator* NewConcatenatingIterator(const ReadOptions&,
|
||||||
|
@ -180,7 +180,14 @@ class DB {
|
|||||||
// end==nullptr is treated as a key after all keys in the database.
|
// end==nullptr is treated as a key after all keys in the database.
|
||||||
// Therefore the following call will compact the entire database:
|
// Therefore the following call will compact the entire database:
|
||||||
// db->CompactRange(nullptr, nullptr);
|
// db->CompactRange(nullptr, nullptr);
|
||||||
virtual void CompactRange(const Slice* begin, const Slice* end) = 0;
|
// Note that after the entire database is compacted, all data are pushed
|
||||||
|
// down to the last level containing any data. If the total data size
|
||||||
|
// after compaction is reduced, that level might not be appropriate for
|
||||||
|
// hosting all the files. In this case, client could set reduce_level
|
||||||
|
// to true, to move the files back to the minimum level capable of holding
|
||||||
|
// the data set.
|
||||||
|
virtual void CompactRange(const Slice* begin, const Slice* end,
|
||||||
|
bool reduce_level = false) = 0;
|
||||||
|
|
||||||
// Number of levels used for this DB.
|
// Number of levels used for this DB.
|
||||||
virtual int NumberLevels() = 0;
|
virtual int NumberLevels() = 0;
|
||||||
|
@ -223,8 +223,9 @@ void DBWithTTL::GetApproximateSizes(const Range* r, int n, uint64_t* sizes) {
|
|||||||
db_->GetApproximateSizes(r, n, sizes);
|
db_->GetApproximateSizes(r, n, sizes);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DBWithTTL::CompactRange(const Slice* begin, const Slice* end) {
|
void DBWithTTL::CompactRange(const Slice* begin, const Slice* end,
|
||||||
db_->CompactRange(begin, end);
|
bool reduce_level) {
|
||||||
|
db_->CompactRange(begin, end, reduce_level);
|
||||||
}
|
}
|
||||||
|
|
||||||
int DBWithTTL::NumberLevels() {
|
int DBWithTTL::NumberLevels() {
|
||||||
|
@ -54,7 +54,8 @@ class DBWithTTL : public DB, CompactionFilter {
|
|||||||
|
|
||||||
virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes);
|
virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes);
|
||||||
|
|
||||||
virtual void CompactRange(const Slice* begin, const Slice* end);
|
virtual void CompactRange(const Slice* begin, const Slice* end,
|
||||||
|
bool reduce_level = false);
|
||||||
|
|
||||||
virtual int NumberLevels();
|
virtual int NumberLevels();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user