Support ingest_behind for IngestExternalFile
Summary: First cut for early review; there are few conceptual points to answer and some code structure issues. For conceptual points - - restriction-wise, we're going to disallow ingest_behind if (use_seqno_zero_out=true || disable_auto_compaction=false), the user is responsible to properly open and close DB with required params - we wanted to ingest into reserved bottom most level. Should we fail fast if bottom level isn't empty, or should we attempt to ingest if file fits there key-ranges-wise? - Modifying AssignLevelForIngestedFile seems the place we we'd handle that. On code structure - going to refactor GenerateAndAddExternalFile call in the test class to allow passing instance of IngestionOptions, that's just going to incur lots of changes at callsites. Closes https://github.com/facebook/rocksdb/pull/2144 Differential Revision: D4873732 Pulled By: lightmark fbshipit-source-id: 81cb698106b68ef8797f564453651d50900e153a
This commit is contained in:
parent
01ab7b528c
commit
ba685a472a
@ -172,6 +172,12 @@ ColumnFamilyOptions SanitizeOptions(const ImmutableDBOptions& db_options,
|
||||
result.num_levels < 2) {
|
||||
result.num_levels = 2;
|
||||
}
|
||||
|
||||
if (result.compaction_style == kCompactionStyleUniversal &&
|
||||
db_options.allow_ingest_behind && result.num_levels < 3) {
|
||||
result.num_levels = 3;
|
||||
}
|
||||
|
||||
if (result.max_write_buffer_number < 2) {
|
||||
result.max_write_buffer_number = 2;
|
||||
}
|
||||
|
@ -548,7 +548,8 @@ void CompactionIterator::PrepareOutput() {
|
||||
|
||||
// This is safe for TransactionDB write-conflict checking since transactions
|
||||
// only care about sequence number larger than any active snapshots.
|
||||
if (bottommost_level_ && valid_ && ikey_.sequence <= earliest_snapshot_ &&
|
||||
if ((compaction_ != nullptr && !compaction_->allow_ingest_behind()) &&
|
||||
bottommost_level_ && valid_ && ikey_.sequence <= earliest_snapshot_ &&
|
||||
ikey_.type != kTypeMerge &&
|
||||
!cmp_->Equal(compaction_->GetLargestUserKey(), ikey_.user_key)) {
|
||||
assert(ikey_.type != kTypeDeletion && ikey_.type != kTypeSingleDeletion);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "db/merge_helper.h"
|
||||
#include "db/pinned_iterators_manager.h"
|
||||
#include "db/range_del_aggregator.h"
|
||||
#include "options/cf_options.h"
|
||||
#include "rocksdb/compaction_filter.h"
|
||||
|
||||
namespace rocksdb {
|
||||
@ -48,6 +49,9 @@ class CompactionIterator {
|
||||
virtual Slice GetLargestUserKey() const {
|
||||
return compaction_->GetLargestUserKey();
|
||||
}
|
||||
virtual bool allow_ingest_behind() const {
|
||||
return compaction_->immutable_cf_options()->allow_ingest_behind;
|
||||
}
|
||||
|
||||
protected:
|
||||
CompactionProxy() = default;
|
||||
|
@ -156,6 +156,7 @@ class FakeCompaction : public CompactionIterator::CompactionProxy {
|
||||
virtual Slice GetLargestUserKey() const {
|
||||
return "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
|
||||
}
|
||||
virtual bool allow_ingest_behind() const { return false; }
|
||||
|
||||
bool key_not_exists_beyond_output_level = false;
|
||||
};
|
||||
|
@ -529,7 +529,11 @@ Compaction* CompactionPicker::CompactRange(
|
||||
// files together to the last level.
|
||||
assert(vstorage->num_levels() > 1);
|
||||
// DBImpl::CompactRange() set output level to be the last level
|
||||
assert(output_level == vstorage->num_levels() - 1);
|
||||
if (ioptions_.allow_ingest_behind) {
|
||||
assert(output_level == vstorage->num_levels() - 2);
|
||||
} else {
|
||||
assert(output_level == vstorage->num_levels() - 1);
|
||||
}
|
||||
// DBImpl::RunManualCompaction will make full range for universal compaction
|
||||
assert(begin == nullptr);
|
||||
assert(end == nullptr);
|
||||
|
@ -402,6 +402,35 @@ TEST_F(CompactionPickerTest, NeedsCompactionUniversal) {
|
||||
vstorage_->CompactionScore(0) >= 1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CompactionPickerTest, CompactionUniversalIngestBehindReservedLevel) {
|
||||
const uint64_t kFileSize = 100000;
|
||||
NewVersionStorage(1, kCompactionStyleUniversal);
|
||||
ioptions_.allow_ingest_behind = true;
|
||||
ioptions_.num_levels = 3;
|
||||
UniversalCompactionPicker universal_compaction_picker(ioptions_, &icmp_);
|
||||
// must return false when there's no files.
|
||||
ASSERT_EQ(universal_compaction_picker.NeedsCompaction(vstorage_.get()),
|
||||
false);
|
||||
|
||||
NewVersionStorage(3, kCompactionStyleUniversal);
|
||||
|
||||
Add(0, 1U, "150", "200", kFileSize, 0, 500, 550);
|
||||
Add(0, 2U, "201", "250", kFileSize, 0, 401, 450);
|
||||
Add(0, 4U, "260", "300", kFileSize, 0, 260, 300);
|
||||
Add(1, 5U, "100", "151", kFileSize, 0, 200, 251);
|
||||
Add(1, 3U, "301", "350", kFileSize, 0, 101, 150);
|
||||
Add(2, 6U, "120", "200", kFileSize, 0, 20, 100);
|
||||
|
||||
UpdateVersionStorageInfo();
|
||||
|
||||
std::unique_ptr<Compaction> compaction(
|
||||
universal_compaction_picker.PickCompaction(
|
||||
cf_name_, mutable_cf_options_, vstorage_.get(), &log_buffer_));
|
||||
|
||||
// output level should be the one above the bottom-most
|
||||
ASSERT_EQ(1, compaction->output_level());
|
||||
}
|
||||
// Tests if the files can be trivially moved in multi level
|
||||
// universal compaction when allow_trivial_move option is set
|
||||
// In this test as the input files overlaps, they cannot
|
||||
|
@ -568,6 +568,13 @@ Compaction* UniversalCompactionPicker::PickCompactionToReduceSortedRuns(
|
||||
output_level = sorted_runs[first_index_after].level - 1;
|
||||
}
|
||||
|
||||
// last level is reserved for the files ingested behind
|
||||
if (ioptions_.allow_ingest_behind &&
|
||||
(output_level == vstorage->num_levels() - 1)) {
|
||||
assert(output_level > 1);
|
||||
output_level--;
|
||||
}
|
||||
|
||||
std::vector<CompactionInputFiles> inputs(vstorage->num_levels());
|
||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||
inputs[i].level = start_level + static_cast<int>(i);
|
||||
@ -719,13 +726,20 @@ Compaction* UniversalCompactionPicker::PickCompactionToReduceSizeAmp(
|
||||
cf_name.c_str(), file_num_buf);
|
||||
}
|
||||
|
||||
// output files at the bottom most level, unless it's reserved
|
||||
int output_level = vstorage->num_levels() - 1;
|
||||
// last level is reserved for the files ingested behind
|
||||
if (ioptions_.allow_ingest_behind) {
|
||||
assert(output_level > 1);
|
||||
output_level--;
|
||||
}
|
||||
|
||||
return new Compaction(
|
||||
vstorage, ioptions_, mutable_cf_options, std::move(inputs),
|
||||
vstorage->num_levels() - 1,
|
||||
mutable_cf_options.MaxFileSizeForLevel(vstorage->num_levels() - 1),
|
||||
output_level, mutable_cf_options.MaxFileSizeForLevel(output_level),
|
||||
/* max_grandparent_overlap_bytes */ LLONG_MAX, path_id,
|
||||
GetCompressionType(ioptions_, vstorage, mutable_cf_options,
|
||||
vstorage->num_levels() - 1, 1),
|
||||
output_level, 1),
|
||||
/* grandparents */ {}, /* is manual */ false, score,
|
||||
false /* deletion_compaction */,
|
||||
CompactionReason::kUniversalSizeAmplification);
|
||||
|
@ -2614,6 +2614,15 @@ Status DBImpl::IngestExternalFile(
|
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
||||
auto cfd = cfh->cfd();
|
||||
|
||||
// Ingest should immediately fail if ingest_behind is requested,
|
||||
// but the DB doesn't support it.
|
||||
if (ingestion_options.ingest_behind) {
|
||||
if (!immutable_db_options_.allow_ingest_behind) {
|
||||
return Status::InvalidArgument(
|
||||
"Can't ingest_behind file in DB with allow_ingest_behind=false");
|
||||
}
|
||||
}
|
||||
|
||||
ExternalSstFileIngestionJob ingestion_job(env_, versions_.get(), cfd,
|
||||
immutable_db_options_, env_options_,
|
||||
&snapshots_, ingestion_options);
|
||||
|
@ -284,10 +284,14 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
|
||||
if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal &&
|
||||
cfd->NumberLevels() > 1) {
|
||||
// Always compact all files together.
|
||||
s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels,
|
||||
cfd->NumberLevels() - 1, options.target_path_id,
|
||||
begin, end, exclusive);
|
||||
final_output_level = cfd->NumberLevels() - 1;
|
||||
// if bottom most level is reserved
|
||||
if (immutable_db_options_.allow_ingest_behind) {
|
||||
final_output_level--;
|
||||
}
|
||||
s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels,
|
||||
final_output_level, options.target_path_id,
|
||||
begin, end, exclusive);
|
||||
} else {
|
||||
for (int level = 0; level <= max_level_with_files; level++) {
|
||||
int output_level;
|
||||
|
@ -171,9 +171,13 @@ Status ExternalSstFileIngestionJob::Run() {
|
||||
|
||||
for (IngestedFileInfo& f : files_to_ingest_) {
|
||||
SequenceNumber assigned_seqno = 0;
|
||||
status = AssignLevelAndSeqnoForIngestedFile(
|
||||
super_version, force_global_seqno, cfd_->ioptions()->compaction_style,
|
||||
&f, &assigned_seqno);
|
||||
if (ingestion_options_.ingest_behind) {
|
||||
status = CheckLevelForIngestedBehindFile(&f);
|
||||
} else {
|
||||
status = AssignLevelAndSeqnoForIngestedFile(
|
||||
super_version, force_global_seqno, cfd_->ioptions()->compaction_style,
|
||||
&f, &assigned_seqno);
|
||||
}
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
@ -408,9 +412,9 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
||||
Arena arena;
|
||||
ReadOptions ro;
|
||||
ro.total_order_seek = true;
|
||||
|
||||
int target_level = 0;
|
||||
auto* vstorage = cfd_->current()->storage_info();
|
||||
|
||||
for (int lvl = 0; lvl < cfd_->NumberLevels(); lvl++) {
|
||||
if (lvl > 0 && lvl < vstorage->base_level()) {
|
||||
continue;
|
||||
@ -418,20 +422,8 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
||||
|
||||
if (vstorage->NumLevelFiles(lvl) > 0) {
|
||||
bool overlap_with_level = false;
|
||||
MergeIteratorBuilder merge_iter_builder(&cfd_->internal_comparator(),
|
||||
&arena);
|
||||
RangeDelAggregator range_del_agg(cfd_->internal_comparator(),
|
||||
{} /* snapshots */);
|
||||
sv->current->AddIteratorsForLevel(ro, env_options_, &merge_iter_builder,
|
||||
lvl, &range_del_agg);
|
||||
if (!range_del_agg.IsEmpty()) {
|
||||
return Status::NotSupported(
|
||||
"file ingestion with range tombstones is currently unsupported");
|
||||
}
|
||||
ScopedArenaIterator level_iter(merge_iter_builder.Finish());
|
||||
|
||||
status = IngestedFileOverlapWithIteratorRange(
|
||||
file_to_ingest, level_iter.get(), &overlap_with_level);
|
||||
status = IngestedFileOverlapWithLevel(sv, file_to_ingest, lvl,
|
||||
&overlap_with_level);
|
||||
if (!status.ok()) {
|
||||
return status;
|
||||
}
|
||||
@ -478,6 +470,33 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
||||
return status;
|
||||
}
|
||||
|
||||
Status ExternalSstFileIngestionJob::CheckLevelForIngestedBehindFile(
|
||||
IngestedFileInfo* file_to_ingest) {
|
||||
auto* vstorage = cfd_->current()->storage_info();
|
||||
// first check if new files fit in the bottommost level
|
||||
int bottom_lvl = cfd_->NumberLevels() - 1;
|
||||
if(!IngestedFileFitInLevel(file_to_ingest, bottom_lvl)) {
|
||||
return Status::InvalidArgument(
|
||||
"Can't ingest_behind file as it doesn't fit "
|
||||
"at the bottommost level!");
|
||||
}
|
||||
|
||||
// second check if despite allow_ingest_behind=true we still have 0 seqnums
|
||||
// at some upper level
|
||||
for (int lvl = 0; lvl < cfd_->NumberLevels() - 1; lvl++) {
|
||||
for (auto file : vstorage->LevelFiles(lvl)) {
|
||||
if (file->smallest_seqno == 0) {
|
||||
return Status::InvalidArgument(
|
||||
"Can't ingest_behind file as despite allow_ingest_behind=true "
|
||||
"there are files with 0 seqno in database at upper levels!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file_to_ingest->picked_level = bottom_lvl;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ExternalSstFileIngestionJob::AssignGlobalSeqnoForIngestedFile(
|
||||
IngestedFileInfo* file_to_ingest, SequenceNumber seqno) {
|
||||
if (file_to_ingest->original_seqno == seqno) {
|
||||
@ -564,6 +583,27 @@ bool ExternalSstFileIngestionJob::IngestedFileFitInLevel(
|
||||
return true;
|
||||
}
|
||||
|
||||
Status ExternalSstFileIngestionJob::IngestedFileOverlapWithLevel(
|
||||
SuperVersion* sv, IngestedFileInfo* file_to_ingest, int lvl,
|
||||
bool* overlap_with_level) {
|
||||
Arena arena;
|
||||
ReadOptions ro;
|
||||
ro.total_order_seek = true;
|
||||
MergeIteratorBuilder merge_iter_builder(&cfd_->internal_comparator(),
|
||||
&arena);
|
||||
RangeDelAggregator range_del_agg(cfd_->internal_comparator(),
|
||||
{} /* snapshots */);
|
||||
sv->current->AddIteratorsForLevel(ro, env_options_, &merge_iter_builder,
|
||||
lvl, &range_del_agg);
|
||||
if (!range_del_agg.IsEmpty()) {
|
||||
return Status::NotSupported(
|
||||
"file ingestion with range tombstones is currently unsupported");
|
||||
}
|
||||
ScopedArenaIterator level_iter(merge_iter_builder.Finish());
|
||||
return IngestedFileOverlapWithIteratorRange(
|
||||
file_to_ingest, level_iter.get(), overlap_with_level);
|
||||
}
|
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
#endif // !ROCKSDB_LITE
|
||||
|
@ -125,6 +125,12 @@ class ExternalSstFileIngestionJob {
|
||||
IngestedFileInfo* file_to_ingest,
|
||||
SequenceNumber* assigned_seqno);
|
||||
|
||||
// File that we want to ingest behind always goes to the lowest level;
|
||||
// we just check that it fits in the level, that DB allows ingest_behind,
|
||||
// and that we don't have 0 seqnums at the upper levels.
|
||||
// REQUIRES: Mutex held
|
||||
Status CheckLevelForIngestedBehindFile(IngestedFileInfo* file_to_ingest);
|
||||
|
||||
// Set the file global sequence number to `seqno`
|
||||
Status AssignGlobalSeqnoForIngestedFile(IngestedFileInfo* file_to_ingest,
|
||||
SequenceNumber seqno);
|
||||
@ -135,6 +141,11 @@ class ExternalSstFileIngestionJob {
|
||||
const IngestedFileInfo* file_to_ingest, InternalIterator* iter,
|
||||
bool* overlap);
|
||||
|
||||
// Check if `file_to_ingest` key range overlap with level
|
||||
// REQUIRES: Mutex held
|
||||
Status IngestedFileOverlapWithLevel(SuperVersion* sv,
|
||||
IngestedFileInfo* file_to_ingest, int lvl, bool* overlap_with_level);
|
||||
|
||||
// Check if `file_to_ingest` can fit in level `level`
|
||||
// REQUIRES: Mutex held
|
||||
bool IngestedFileFitInLevel(const IngestedFileInfo* file_to_ingest,
|
||||
|
@ -90,6 +90,68 @@ class ExternalSSTFileTest : public DBTestBase {
|
||||
return s;
|
||||
}
|
||||
|
||||
Status GenerateAndAddExternalFileIngestBehind(
|
||||
const Options options, const IngestExternalFileOptions ifo,
|
||||
std::vector<std::pair<std::string, std::string>> data, int file_id = -1,
|
||||
bool sort_data = false,
|
||||
std::map<std::string, std::string>* true_data = nullptr,
|
||||
ColumnFamilyHandle* cfh = nullptr) {
|
||||
// Generate a file id if not provided
|
||||
if (file_id == -1) {
|
||||
file_id = last_file_id_ + 1;
|
||||
last_file_id_++;
|
||||
}
|
||||
|
||||
// Sort data if asked to do so
|
||||
if (sort_data) {
|
||||
std::sort(data.begin(), data.end(),
|
||||
[&](const std::pair<std::string, std::string>& e1,
|
||||
const std::pair<std::string, std::string>& e2) {
|
||||
return options.comparator->Compare(e1.first, e2.first) < 0;
|
||||
});
|
||||
auto uniq_iter = std::unique(
|
||||
data.begin(), data.end(),
|
||||
[&](const std::pair<std::string, std::string>& e1,
|
||||
const std::pair<std::string, std::string>& e2) {
|
||||
return options.comparator->Compare(e1.first, e2.first) == 0;
|
||||
});
|
||||
data.resize(uniq_iter - data.begin());
|
||||
}
|
||||
std::string file_path = sst_files_dir_ + ToString(file_id);
|
||||
SstFileWriter sst_file_writer(EnvOptions(), options, cfh);
|
||||
|
||||
Status s = sst_file_writer.Open(file_path);
|
||||
if (!s.ok()) {
|
||||
return s;
|
||||
}
|
||||
for (auto& entry : data) {
|
||||
s = sst_file_writer.Add(entry.first, entry.second);
|
||||
if (!s.ok()) {
|
||||
sst_file_writer.Finish();
|
||||
return s;
|
||||
}
|
||||
}
|
||||
s = sst_file_writer.Finish();
|
||||
|
||||
if (s.ok()) {
|
||||
if (cfh) {
|
||||
s = db_->IngestExternalFile(cfh, {file_path}, ifo);
|
||||
} else {
|
||||
s = db_->IngestExternalFile({file_path}, ifo);
|
||||
}
|
||||
}
|
||||
|
||||
if (s.ok() && true_data) {
|
||||
for (auto& entry : data) {
|
||||
(*true_data)[entry.first] = entry.second;
|
||||
}
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Status GenerateAndAddExternalFile(
|
||||
const Options options, std::vector<std::pair<int, std::string>> data,
|
||||
int file_id = -1, bool allow_global_seqno = false, bool sort_data = false,
|
||||
@ -1817,6 +1879,69 @@ TEST_F(ExternalSSTFileTest, SnapshotInconsistencyBug) {
|
||||
|
||||
db_->ReleaseSnapshot(snap);
|
||||
}
|
||||
|
||||
TEST_F(ExternalSSTFileTest, IngestBehind) {
|
||||
Options options = CurrentOptions();
|
||||
options.compaction_style = kCompactionStyleUniversal;
|
||||
options.num_levels = 3;
|
||||
options.disable_auto_compactions = false;
|
||||
DestroyAndReopen(options);
|
||||
std::vector<std::pair<std::string, std::string>> file_data;
|
||||
std::map<std::string, std::string> true_data;
|
||||
|
||||
// Insert 100 -> 200 into the memtable
|
||||
for (int i = 100; i <= 200; i++) {
|
||||
ASSERT_OK(Put(Key(i), "memtable"));
|
||||
true_data[Key(i)] = "memtable";
|
||||
}
|
||||
|
||||
// Insert 100 -> 200 using IngestExternalFile
|
||||
file_data.clear();
|
||||
for (int i = 0; i <= 20; i++) {
|
||||
file_data.emplace_back(Key(i), "ingest_behind");
|
||||
}
|
||||
|
||||
IngestExternalFileOptions ifo;
|
||||
ifo.allow_global_seqno = true;
|
||||
ifo.ingest_behind = true;
|
||||
|
||||
// Can't ingest behind since allow_ingest_behind isn't set to true
|
||||
ASSERT_NOK(GenerateAndAddExternalFileIngestBehind(options, ifo,
|
||||
file_data, -1, false,
|
||||
&true_data));
|
||||
|
||||
options.allow_ingest_behind = true;
|
||||
// check that we still can open the DB, as num_levels should be
|
||||
// sanitized to 3
|
||||
options.num_levels = 2;
|
||||
DestroyAndReopen(options);
|
||||
|
||||
options.num_levels = 3;
|
||||
DestroyAndReopen(options);
|
||||
// Insert 100 -> 200 into the memtable
|
||||
for (int i = 100; i <= 200; i++) {
|
||||
ASSERT_OK(Put(Key(i), "memtable"));
|
||||
true_data[Key(i)] = "memtable";
|
||||
}
|
||||
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
||||
// Universal picker should go at second from the bottom level
|
||||
ASSERT_EQ("0,1", FilesPerLevel());
|
||||
ASSERT_OK(GenerateAndAddExternalFileIngestBehind(options, ifo,
|
||||
file_data, -1, false,
|
||||
&true_data));
|
||||
ASSERT_EQ("0,1,1", FilesPerLevel());
|
||||
// this time ingest should fail as the file doesn't fit to the bottom level
|
||||
ASSERT_NOK(GenerateAndAddExternalFileIngestBehind(options, ifo,
|
||||
file_data, -1, false,
|
||||
&true_data));
|
||||
ASSERT_EQ("0,1,1", FilesPerLevel());
|
||||
db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
|
||||
// bottom level should be empty
|
||||
ASSERT_EQ("0,1", FilesPerLevel());
|
||||
|
||||
size_t kcnt = 0;
|
||||
VerifyDBFromMap(true_data, &kcnt, false);
|
||||
}
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
@ -944,14 +944,22 @@ class DB {
|
||||
}
|
||||
|
||||
// IngestExternalFile() will load a list of external SST files (1) into the DB
|
||||
// We will try to find the lowest possible level that the file can fit in, and
|
||||
// ingest the file into this level (2). A file that have a key range that
|
||||
// overlap with the memtable key range will require us to Flush the memtable
|
||||
// first before ingesting the file.
|
||||
// Two primary modes are supported:
|
||||
// - Duplicate keys in the new files will overwrite exiting keys (default)
|
||||
// - Duplicate keys will be skipped (set ingest_behind=true)
|
||||
// In the first mode we will try to find the lowest possible level that
|
||||
// the file can fit in, and ingest the file into this level (2). A file that
|
||||
// have a key range that overlap with the memtable key range will require us
|
||||
// to Flush the memtable first before ingesting the file.
|
||||
// In the second mode we will always ingest in the bottom mode level (see
|
||||
// docs to IngestExternalFileOptions::ingest_behind).
|
||||
//
|
||||
// (1) External SST files can be created using SstFileWriter
|
||||
// (2) We will try to ingest the files to the lowest possible level
|
||||
// even if the file compression dont match the level compression
|
||||
// (3) If IngestExternalFileOptions->ingest_behind is set to true,
|
||||
// we always ingest at the bottommost level, which should be reserved
|
||||
// for this purpose (see DBOPtions::allow_ingest_behind flag).
|
||||
virtual Status IngestExternalFile(
|
||||
ColumnFamilyHandle* column_family,
|
||||
const std::vector<std::string>& external_files,
|
||||
|
@ -847,6 +847,18 @@ struct DBOptions {
|
||||
//
|
||||
// Dynamically changeable through SetDBOptions() API.
|
||||
bool avoid_flush_during_shutdown = false;
|
||||
|
||||
// Set this option to true during creation of database if you want
|
||||
// to be able to ingest behind (call IngestExternalFile() skipping keys
|
||||
// that already exist, rather than overwriting matching keys).
|
||||
// Setting this option to true will affect 2 things:
|
||||
// 1) Disable some internal optimizations around SST file compression
|
||||
// 2) Reserve bottom-most level for ingested files only.
|
||||
// 3) Note that num_levels should be >= 3 if this option is turned on.
|
||||
//
|
||||
// DEFAULT: false
|
||||
// Immutable.
|
||||
bool allow_ingest_behind = false;
|
||||
};
|
||||
|
||||
// Options to control the behavior of a database (passed to DB::Open)
|
||||
@ -1126,6 +1138,14 @@ struct IngestExternalFileOptions {
|
||||
// If set to false and the file key range overlaps with the memtable key range
|
||||
// (memtable flush required), IngestExternalFile will fail.
|
||||
bool allow_blocking_flush = true;
|
||||
// Set to true if you would like duplicate keys in the file being ingested
|
||||
// to be skipped rather than overwriting existing data under that key.
|
||||
// Usecase: back-fill of some historical data in the database without
|
||||
// over-writing existing newer version of data.
|
||||
// This option could only be used if the DB has been running
|
||||
// with allow_ingest_behind=true since the dawn of time.
|
||||
// All files will be ingested at the bottommost level with seqno=0.
|
||||
bool ingest_behind = false;
|
||||
};
|
||||
|
||||
} // namespace rocksdb
|
||||
|
@ -71,6 +71,7 @@ ImmutableCFOptions::ImmutableCFOptions(const ImmutableDBOptions& db_options,
|
||||
num_levels(cf_options.num_levels),
|
||||
optimize_filters_for_hits(cf_options.optimize_filters_for_hits),
|
||||
force_consistency_checks(cf_options.force_consistency_checks),
|
||||
allow_ingest_behind(db_options.allow_ingest_behind),
|
||||
listeners(db_options.listeners),
|
||||
row_cache(db_options.row_cache),
|
||||
max_subcompactions(db_options.max_subcompactions),
|
||||
|
@ -108,6 +108,8 @@ struct ImmutableCFOptions {
|
||||
|
||||
bool force_consistency_checks;
|
||||
|
||||
bool allow_ingest_behind;
|
||||
|
||||
// A vector of EventListeners which call-back functions will be called
|
||||
// when specific RocksDB event happens.
|
||||
std::vector<std::shared_ptr<EventListener>> listeners;
|
||||
|
@ -84,7 +84,8 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
|
||||
#endif // ROCKSDB_LITE
|
||||
fail_if_options_file_error(options.fail_if_options_file_error),
|
||||
dump_malloc_stats(options.dump_malloc_stats),
|
||||
avoid_flush_during_recovery(options.avoid_flush_during_recovery) {
|
||||
avoid_flush_during_recovery(options.avoid_flush_during_recovery),
|
||||
allow_ingest_behind(options.allow_ingest_behind) {
|
||||
}
|
||||
|
||||
void ImmutableDBOptions::Dump(Logger* log) const {
|
||||
@ -210,8 +211,11 @@ void ImmutableDBOptions::Dump(Logger* log) const {
|
||||
ROCKS_LOG_HEADER(log, " Options.wal_filter: %s",
|
||||
wal_filter ? wal_filter->Name() : "None");
|
||||
#endif // ROCKDB_LITE
|
||||
|
||||
ROCKS_LOG_HEADER(log, " Options.avoid_flush_during_recovery: %d",
|
||||
avoid_flush_during_recovery);
|
||||
ROCKS_LOG_HEADER(log, " Options.allow_ingest_behind: %d",
|
||||
allow_ingest_behind);
|
||||
}
|
||||
|
||||
MutableDBOptions::MutableDBOptions()
|
||||
|
@ -77,6 +77,7 @@ struct ImmutableDBOptions {
|
||||
bool fail_if_options_file_error;
|
||||
bool dump_malloc_stats;
|
||||
bool avoid_flush_during_recovery;
|
||||
bool allow_ingest_behind;
|
||||
};
|
||||
|
||||
struct MutableDBOptions {
|
||||
|
@ -190,7 +190,8 @@ DBOptions::DBOptions(const Options& options)
|
||||
fail_if_options_file_error(options.fail_if_options_file_error),
|
||||
dump_malloc_stats(options.dump_malloc_stats),
|
||||
avoid_flush_during_recovery(options.avoid_flush_during_recovery),
|
||||
avoid_flush_during_shutdown(options.avoid_flush_during_shutdown) {
|
||||
avoid_flush_during_shutdown(options.avoid_flush_during_shutdown),
|
||||
allow_ingest_behind(options.allow_ingest_behind) {
|
||||
}
|
||||
|
||||
void DBOptions::Dump(Logger* log) const {
|
||||
|
@ -119,6 +119,8 @@ DBOptions BuildDBOptions(const ImmutableDBOptions& immutable_db_options,
|
||||
immutable_db_options.avoid_flush_during_recovery;
|
||||
options.avoid_flush_during_shutdown =
|
||||
mutable_db_options.avoid_flush_during_shutdown;
|
||||
options.allow_ingest_behind =
|
||||
immutable_db_options.allow_ingest_behind;
|
||||
|
||||
return options;
|
||||
}
|
||||
|
@ -335,7 +335,11 @@ static std::unordered_map<std::string, OptionTypeInfo> db_options_type_info = {
|
||||
{"avoid_flush_during_shutdown",
|
||||
{offsetof(struct DBOptions, avoid_flush_during_shutdown),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, true,
|
||||
offsetof(struct MutableDBOptions, avoid_flush_during_shutdown)}}};
|
||||
offsetof(struct MutableDBOptions, avoid_flush_during_shutdown)}},
|
||||
{"allow_ingest_behind",
|
||||
{offsetof(struct DBOptions, allow_ingest_behind),
|
||||
OptionType::kBoolean, OptionVerificationType::kNormal, false,
|
||||
offsetof(struct ImmutableDBOptions, allow_ingest_behind)}}};
|
||||
|
||||
// offset_of is used to get the offset of a class data member
|
||||
// ex: offset_of(&ColumnFamilyOptions::num_levels)
|
||||
|
@ -291,7 +291,8 @@ TEST_F(OptionsSettableTest, DBOptionsAllFieldsSettable) {
|
||||
"dump_malloc_stats=false;"
|
||||
"allow_2pc=false;"
|
||||
"avoid_flush_during_recovery=false;"
|
||||
"avoid_flush_during_shutdown=false;",
|
||||
"avoid_flush_during_shutdown=false;"
|
||||
"allow_ingest_behind=false;",
|
||||
new_options));
|
||||
|
||||
ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),
|
||||
|
Loading…
x
Reference in New Issue
Block a user