2014-01-16 01:22:34 +01:00
|
|
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under the BSD-style license found in the
|
|
|
|
// LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
// of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#pragma once
|
2014-07-11 21:52:41 +02:00
|
|
|
#include "util/arena.h"
|
|
|
|
#include "util/autovector.h"
|
2014-10-02 01:19:16 +02:00
|
|
|
#include "util/mutable_cf_options.h"
|
2014-01-16 01:22:34 +01:00
|
|
|
#include "db/version_set.h"
|
|
|
|
|
|
|
|
namespace rocksdb {
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// The structure that manages compaction input files associated
|
|
|
|
// with the same physical level.
|
2014-07-17 03:12:17 +02:00
|
|
|
struct CompactionInputFiles {
|
|
|
|
int level;
|
|
|
|
std::vector<FileMetaData*> files;
|
|
|
|
inline bool empty() const { return files.empty(); }
|
|
|
|
inline size_t size() const { return files.size(); }
|
|
|
|
inline void clear() { files.clear(); }
|
2014-11-11 22:47:22 +01:00
|
|
|
inline FileMetaData* operator[](size_t i) const { return files[i]; }
|
2014-07-17 03:12:17 +02:00
|
|
|
};
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
class Version;
|
2014-02-01 01:45:20 +01:00
|
|
|
class ColumnFamilyData;
|
2014-10-27 23:49:46 +01:00
|
|
|
class VersionStorageInfo;
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
// A Compaction encapsulates information about a compaction.
|
|
|
|
class Compaction {
|
|
|
|
public:
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
Compaction(VersionStorageInfo* input_version,
|
|
|
|
const autovector<CompactionInputFiles>& inputs,
|
|
|
|
int start_level, int output_level,
|
|
|
|
uint64_t max_grandparent_overlap_bytes,
|
|
|
|
const CompactionOptions& options,
|
|
|
|
bool deletion_compaction);
|
|
|
|
|
2014-07-11 21:52:41 +02:00
|
|
|
// No copying allowed
|
|
|
|
Compaction(const Compaction&) = delete;
|
|
|
|
void operator=(const Compaction&) = delete;
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
~Compaction();
|
|
|
|
|
2014-07-17 03:12:17 +02:00
|
|
|
// Returns the level associated to the specified compaction input level.
|
2014-07-17 23:36:41 +02:00
|
|
|
// If compaction_input_level is not specified, then input_level is set to 0.
|
2014-11-11 22:47:22 +01:00
|
|
|
int level(size_t compaction_input_level = 0) const {
|
2014-07-17 23:36:41 +02:00
|
|
|
return inputs_[compaction_input_level].level;
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
// Outputs will go to this level
|
2014-07-17 23:36:41 +02:00
|
|
|
int output_level() const { return output_level_; }
|
|
|
|
|
|
|
|
// Returns the number of input levels in this compaction.
|
2014-11-11 22:47:22 +01:00
|
|
|
size_t num_input_levels() const { return inputs_.size(); }
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
// Return the object that holds the edits to the descriptor done
|
|
|
|
// by this compaction.
|
2014-07-17 23:36:41 +02:00
|
|
|
VersionEdit* edit() const { return edit_; }
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// Returns the number of input files associated to the specified
|
|
|
|
// compaction input level.
|
|
|
|
// The function will return 0 if when "compaction_input_level" < 0
|
|
|
|
// or "compaction_input_level" >= "num_input_levels()".
|
2014-11-11 22:47:22 +01:00
|
|
|
size_t num_input_files(size_t compaction_input_level) const {
|
2014-07-18 01:37:25 +02:00
|
|
|
if (compaction_input_level < inputs_.size()) {
|
2014-07-17 23:36:41 +02:00
|
|
|
return inputs_[compaction_input_level].size();
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-01-22 19:55:16 +01:00
|
|
|
// Returns input version of the compaction
|
|
|
|
Version* input_version() const { return input_version_; }
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// Returns the ColumnFamilyData associated with the compaction.
|
2014-02-01 01:45:20 +01:00
|
|
|
ColumnFamilyData* column_family_data() const { return cfd_; }
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// Returns the file meta data of the 'i'th input file at the
|
|
|
|
// specified compaction input level.
|
|
|
|
// REQUIREMENT: "compaction_input_level" must be >= 0 and
|
|
|
|
// < "input_levels()"
|
2014-11-11 22:47:22 +01:00
|
|
|
FileMetaData* input(size_t compaction_input_level, size_t i) const {
|
2014-07-18 01:37:25 +02:00
|
|
|
assert(compaction_input_level < inputs_.size());
|
2014-07-17 23:36:41 +02:00
|
|
|
return inputs_[compaction_input_level][i];
|
|
|
|
}
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// Returns the list of file meta data of the specified compaction
|
|
|
|
// input level.
|
|
|
|
// REQUIREMENT: "compaction_input_level" must be >= 0 and
|
|
|
|
// < "input_levels()"
|
2015-02-20 20:21:19 +01:00
|
|
|
const std::vector<FileMetaData*>* inputs(size_t compaction_input_level) {
|
2014-07-18 01:37:25 +02:00
|
|
|
assert(compaction_input_level < inputs_.size());
|
2014-07-17 23:36:41 +02:00
|
|
|
return &inputs_[compaction_input_level].files;
|
2014-07-17 03:12:17 +02:00
|
|
|
}
|
2014-01-22 19:55:16 +01:00
|
|
|
|
2014-10-28 18:03:13 +01:00
|
|
|
// Returns the LevelFilesBrief of the specified compaction input level.
|
2014-11-11 22:47:22 +01:00
|
|
|
LevelFilesBrief* input_levels(size_t compaction_input_level) {
|
2014-07-17 23:36:41 +02:00
|
|
|
return &input_levels_[compaction_input_level];
|
|
|
|
}
|
2014-07-11 21:52:41 +02:00
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
// Maximum size of files to build during this compaction.
|
|
|
|
uint64_t MaxOutputFileSize() const { return max_output_file_size_; }
|
|
|
|
|
2014-07-02 20:40:57 +02:00
|
|
|
// What compression for output
|
|
|
|
CompressionType OutputCompressionType() const { return output_compression_; }
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-07-02 18:54:20 +02:00
|
|
|
// Whether need to write output file to second DB path.
|
|
|
|
uint32_t GetOutputPathId() const { return output_path_id_; }
|
|
|
|
|
2014-07-11 21:52:41 +02:00
|
|
|
// Generate input_levels_ from inputs_
|
|
|
|
// Should be called when inputs_ is stable
|
|
|
|
void GenerateFileLevels();
|
|
|
|
|
|
|
|
// Is this a trivial compaction that can be implemented by just
|
2014-01-16 01:22:34 +01:00
|
|
|
// moving a single input file to the next level (no merging or splitting)
|
|
|
|
bool IsTrivialMove() const;
|
|
|
|
|
2014-12-17 01:57:22 +01:00
|
|
|
// If true, then the compaction can be done by simply deleting input files.
|
2014-07-17 23:36:41 +02:00
|
|
|
bool IsDeletionCompaction() const {
|
|
|
|
return deletion_compaction_;
|
|
|
|
}
|
2014-05-21 20:43:35 +02:00
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
// Add all inputs to this compaction as delete operations to *edit.
|
|
|
|
void AddInputDeletions(VersionEdit* edit);
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// Returns true if the available information we have guarantees that
|
|
|
|
// the input "user_key" does not exist in any level beyond "output_level()".
|
|
|
|
bool KeyNotExistsBeyondOutputLevel(const Slice& user_key);
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
// Returns true iff we should stop building the current output
|
|
|
|
// before processing "internal_key".
|
|
|
|
bool ShouldStopBefore(const Slice& internal_key);
|
|
|
|
|
2014-02-01 01:45:20 +01:00
|
|
|
// Clear all files to indicate that they are not being compacted
|
|
|
|
// Delete this compaction from the list of running compactions.
|
|
|
|
void ReleaseCompactionFiles(Status status);
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// Returns the summary of the compaction in "output" with maximum "len"
|
|
|
|
// in bytes. The caller is responsible for the memory management of
|
|
|
|
// "output".
|
2014-01-16 01:22:34 +01:00
|
|
|
void Summary(char* output, int len);
|
|
|
|
|
|
|
|
// Return the score that was used to pick this compaction run.
|
|
|
|
double score() const { return score_; }
|
|
|
|
|
|
|
|
// Is this compaction creating a file in the bottom most level?
|
|
|
|
bool BottomMostLevel() { return bottommost_level_; }
|
|
|
|
|
|
|
|
// Does this compaction include all sst files?
|
|
|
|
bool IsFullCompaction() { return is_full_compaction_; }
|
|
|
|
|
2014-02-12 21:24:18 +01:00
|
|
|
// Was this compaction triggered manually by the client?
|
|
|
|
bool IsManualCompaction() { return is_manual_compaction_; }
|
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
void SetOutputPathId(uint32_t path_id) { output_path_id_ = path_id; }
|
|
|
|
|
2014-10-02 01:19:16 +02:00
|
|
|
// Return the MutableCFOptions that should be used throughout the compaction
|
|
|
|
// procedure
|
|
|
|
const MutableCFOptions* mutable_cf_options() { return &mutable_cf_options_; }
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// Returns the size in bytes that the output file should be preallocated to.
|
2014-06-05 22:19:35 +02:00
|
|
|
// In level compaction, that is max_file_size_. In universal compaction, that
|
2014-07-17 23:36:41 +02:00
|
|
|
// is the sum of all input file sizes.
|
2014-10-02 01:19:16 +02:00
|
|
|
uint64_t OutputFilePreallocationSize(const MutableCFOptions& mutable_options);
|
2014-06-05 22:19:35 +02:00
|
|
|
|
2014-10-27 23:49:46 +01:00
|
|
|
void SetInputVersion(Version* input_version);
|
|
|
|
|
CompactFiles, EventListener and GetDatabaseMetaData
Summary:
This diff adds three sets of APIs to RocksDB.
= GetColumnFamilyMetaData =
* This APIs allow users to obtain the current state of a RocksDB instance on one column family.
* See GetColumnFamilyMetaData in include/rocksdb/db.h
= EventListener =
* A virtual class that allows users to implement a set of
call-back functions which will be called when specific
events of a RocksDB instance happens.
* To register EventListener, simply insert an EventListener to ColumnFamilyOptions::listeners
= CompactFiles =
* CompactFiles API inputs a set of file numbers and an output level, and RocksDB
will try to compact those files into the specified level.
= Example =
* Example code can be found in example/compact_files_example.cc, which implements
a simple external compactor using EventListener, GetColumnFamilyMetaData, and
CompactFiles API.
Test Plan:
listener_test
compactor_test
example/compact_files_example
export ROCKSDB_TESTS=CompactFiles
db_test
export ROCKSDB_TESTS=MetaData
db_test
Reviewers: ljin, igor, rven, sdong
Reviewed By: sdong
Subscribers: MarkCallaghan, dhruba, leveldb
Differential Revision: https://reviews.facebook.net/D24705
2014-11-07 23:45:18 +01:00
|
|
|
// mark (or clear) all files that are being compacted
|
|
|
|
void MarkFilesBeingCompacted(bool mark_as_compacted);
|
|
|
|
|
|
|
|
// Initialize whether the compaction is producing files at the
|
|
|
|
// bottommost level.
|
|
|
|
//
|
|
|
|
// @see BottomMostLevel()
|
|
|
|
void SetupBottomMostLevel(VersionStorageInfo* vstorage, bool is_manual,
|
|
|
|
bool level0_only);
|
|
|
|
|
2014-11-14 20:35:48 +01:00
|
|
|
static Compaction* TEST_NewCompaction(
|
|
|
|
int num_levels, int start_level, int out_level, uint64_t target_file_size,
|
|
|
|
uint64_t max_grandparent_overlap_bytes, uint32_t output_path_id,
|
|
|
|
CompressionType output_compression, bool seek_compaction = false,
|
|
|
|
bool deletion_compaction = false);
|
|
|
|
|
2014-11-14 20:52:17 +01:00
|
|
|
CompactionInputFiles* TEST_GetInputFiles(int l) {
|
|
|
|
return &inputs_[l];
|
2014-11-14 20:35:48 +01:00
|
|
|
}
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
private:
|
CompactionPicker
Summary:
This is a big one. This diff moves all the code related to picking compactions from VersionSet to new class CompactionPicker. Column families' compactions will be completely separate processes, so we need to have multiple CompactionPickers.
To make this easier to review, most of the code change is just copy/paste. There is also a small change not to use VersionSet::current_, but rather to take `Version* version` as a parameter. Most of the other code is exactly the same.
In future diffs, I will also make some improvements to CompactionPickers. I think the most important part will be encapsulating it better. Currently Version, VersionSet, Compaction and CompactionPicker are all friend classes, which makes it harder to change the implementation.
This diff depends on D15171, D15183, D15189 and D15201
Test Plan: `make check`
Reviewers: kailiu, sdong, dhruba, haobo
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15207
2014-01-16 22:03:52 +01:00
|
|
|
friend class CompactionPicker;
|
|
|
|
friend class UniversalCompactionPicker;
|
2014-05-21 20:43:35 +02:00
|
|
|
friend class FIFOCompactionPicker;
|
CompactionPicker
Summary:
This is a big one. This diff moves all the code related to picking compactions from VersionSet to new class CompactionPicker. Column families' compactions will be completely separate processes, so we need to have multiple CompactionPickers.
To make this easier to review, most of the code change is just copy/paste. There is also a small change not to use VersionSet::current_, but rather to take `Version* version` as a parameter. Most of the other code is exactly the same.
In future diffs, I will also make some improvements to CompactionPickers. I think the most important part will be encapsulating it better. Currently Version, VersionSet, Compaction and CompactionPicker are all friend classes, which makes it harder to change the implementation.
This diff depends on D15171, D15183, D15189 and D15201
Test Plan: `make check`
Reviewers: kailiu, sdong, dhruba, haobo
Reviewed By: kailiu
CC: leveldb
Differential Revision: https://reviews.facebook.net/D15207
2014-01-16 22:03:52 +01:00
|
|
|
friend class LevelCompactionPicker;
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-10-27 23:49:46 +01:00
|
|
|
Compaction(int num_levels, int start_level, int out_level,
|
2014-01-16 01:22:34 +01:00
|
|
|
uint64_t target_file_size, uint64_t max_grandparent_overlap_bytes,
|
2014-07-02 18:54:20 +02:00
|
|
|
uint32_t output_path_id, CompressionType output_compression,
|
|
|
|
bool seek_compaction = false, bool deletion_compaction = false);
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
const int start_level_; // the lowest level to be compacted
|
|
|
|
const int output_level_; // levels to which output files are stored
|
2014-01-16 01:22:34 +01:00
|
|
|
uint64_t max_output_file_size_;
|
2014-01-22 19:55:16 +01:00
|
|
|
uint64_t max_grandparent_overlap_bytes_;
|
2014-10-02 01:19:16 +02:00
|
|
|
MutableCFOptions mutable_cf_options_;
|
2014-01-16 01:22:34 +01:00
|
|
|
Version* input_version_;
|
|
|
|
VersionEdit* edit_;
|
|
|
|
int number_levels_;
|
2014-02-01 01:45:20 +01:00
|
|
|
ColumnFamilyData* cfd_;
|
2014-07-11 21:52:41 +02:00
|
|
|
Arena arena_; // Arena used to allocate space for file_levels_
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-07-02 18:54:20 +02:00
|
|
|
uint32_t output_path_id_;
|
2014-07-02 20:40:57 +02:00
|
|
|
CompressionType output_compression_;
|
2014-01-16 01:22:34 +01:00
|
|
|
bool seek_compaction_;
|
2014-07-17 23:36:41 +02:00
|
|
|
// If true, then the comaction can be done by simply deleting input files.
|
2014-05-21 20:43:35 +02:00
|
|
|
bool deletion_compaction_;
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// Compaction input files organized by level.
|
|
|
|
autovector<CompactionInputFiles> inputs_;
|
2014-01-16 01:22:34 +01:00
|
|
|
|
2014-07-11 21:52:41 +02:00
|
|
|
// A copy of inputs_, organized more closely in memory
|
2014-10-28 18:03:13 +01:00
|
|
|
autovector<LevelFilesBrief, 2> input_levels_;
|
2014-07-11 21:52:41 +02:00
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
// State used to check for number of of overlapping grandparent files
|
2014-07-17 23:36:41 +02:00
|
|
|
// (grandparent == "output_level_ + 1")
|
|
|
|
// This vector is updated by Version::GetOverlappingInputs().
|
2014-01-16 01:22:34 +01:00
|
|
|
std::vector<FileMetaData*> grandparents_;
|
2014-07-17 23:36:41 +02:00
|
|
|
size_t grandparent_index_; // Index in grandparent_starts_
|
|
|
|
bool seen_key_; // Some output key has been seen
|
2014-01-16 01:22:34 +01:00
|
|
|
uint64_t overlapped_bytes_; // Bytes of overlap between current output
|
2014-07-17 23:36:41 +02:00
|
|
|
// and grandparent files
|
|
|
|
int base_index_; // index of the file in files_[start_level_]
|
|
|
|
int parent_index_; // index of some file with same range in
|
|
|
|
// files_[start_level_+1]
|
|
|
|
double score_; // score that was used to pick this compaction.
|
2014-01-16 01:22:34 +01:00
|
|
|
|
|
|
|
// Is this compaction creating a file in the bottom most level?
|
|
|
|
bool bottommost_level_;
|
|
|
|
// Does this compaction include all sst files?
|
|
|
|
bool is_full_compaction_;
|
|
|
|
|
2014-02-12 21:24:18 +01:00
|
|
|
// Is this compaction requested by the client?
|
|
|
|
bool is_manual_compaction_;
|
|
|
|
|
2014-07-17 23:36:41 +02:00
|
|
|
// "level_ptrs_" holds indices into "input_version_->levels_", where each
|
|
|
|
// index remembers which file of an associated level we are currently used
|
|
|
|
// to check KeyNotExistsBeyondOutputLevel() for deletion operation.
|
|
|
|
// As it is for checking KeyNotExistsBeyondOutputLevel(), it only
|
|
|
|
// records indices for all levels beyond "output_level_".
|
2014-01-16 01:22:34 +01:00
|
|
|
std::vector<size_t> level_ptrs_;
|
|
|
|
|
|
|
|
// In case of compaction error, reset the nextIndex that is used
|
|
|
|
// to pick up the next file to be compacted from files_by_size_
|
|
|
|
void ResetNextCompactionIndex();
|
|
|
|
};
|
|
|
|
|
2014-08-07 19:05:04 +02:00
|
|
|
// Utility function
|
|
|
|
extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files);
|
|
|
|
|
2014-01-16 01:22:34 +01:00
|
|
|
} // namespace rocksdb
|