From 1e7cca20fba18acb762c0c81ed60ec29f84b3047 Mon Sep 17 00:00:00 2001 From: anand1976 <33647610+anand1976@users.noreply.github.com> Date: Mon, 15 Oct 2018 23:20:15 -0700 Subject: [PATCH] Properly determine a truncated CompactRange stop key (#4496) Summary: When a CompactRange() call for a level is truncated before the end key is reached, because it exceeds max_compaction_bytes, we need to properly set the compaction_end parameter to indicate the stop key. The next CompactRange will use that as the begin key. We set it to the smallest key of the next file in the level after expanding inputs to get a clean cut. Previously, we were setting it before expanding inputs. So we could end up recompacting some files. In a pathological case, where a single key has many entries spanning all the files in the level (possibly due to merge operands without a partial merge operator, thus resulting in compaction output identical to the input), this would result in an endless loop over the same set of files. Pull Request resolved: https://github.com/facebook/rocksdb/pull/4496 Differential Revision: D10395026 Pulled By: anand1976 fbshipit-source-id: f0c2f89fee29b4b3be53b6467b53abba8e9146a9 --- HISTORY.md | 1 + db/compaction_picker.cc | 16 ++++++++++----- db/compaction_picker.h | 3 ++- db/db_compaction_test.cc | 44 ++++++++++++++++++++++++++++++++++++++++ db/version_set.cc | 26 ++++++++++++++++++++---- db/version_set.h | 13 +++++++----- 6 files changed, 88 insertions(+), 15 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index cf23380b3..1a8bad816 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## Unreleased ### Bug Fixes * Fix slow flush/compaction when DB contains many snapshots. The problem became noticeable to us in DBs with 100,000+ snapshots, though it will affect others at different thresholds. +* Properly set the stop key for a truncated manual CompactRange ## 5.16.4 (10/10/2018) ### Bug Fixes diff --git a/db/compaction_picker.cc b/db/compaction_picker.cc index f3f62933c..c1401f98e 100644 --- a/db/compaction_picker.cc +++ b/db/compaction_picker.cc @@ -219,7 +219,8 @@ void CompactionPicker::GetRange(const std::vector& inputs, bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/, VersionStorageInfo* vstorage, - CompactionInputFiles* inputs) { + CompactionInputFiles* inputs, + InternalKey** next_smallest) { // This isn't good compaction assert(!inputs->empty()); @@ -242,7 +243,8 @@ bool CompactionPicker::ExpandInputsToCleanCut(const std::string& /*cf_name*/, GetRange(*inputs, &smallest, &largest); inputs->clear(); vstorage->GetOverlappingInputs(level, &smallest, &largest, &inputs->files, - hint_index, &hint_index); + hint_index, &hint_index, true, + next_smallest); } while (inputs->size() > old_size); // we started off with inputs non-empty and the previous loop only grew @@ -649,7 +651,6 @@ Compaction* CompactionPicker::CompactRange( uint64_t s = inputs[i]->compensated_file_size; total += s; if (total >= limit) { - **compaction_end = inputs[i + 1]->smallest; covering_the_whole_range = false; inputs.files.resize(i + 1); break; @@ -658,7 +659,10 @@ Compaction* CompactionPicker::CompactRange( } assert(output_path_id < static_cast(ioptions_.cf_paths.size())); - if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs) == false) { + InternalKey key_storage; + InternalKey* next_smallest = &key_storage; + if (ExpandInputsToCleanCut(cf_name, vstorage, &inputs, &next_smallest) == + false) { // manual compaction is now multi-threaded, so it can // happen that ExpandWhileOverlapping fails // we handle it higher in RunManualCompaction @@ -666,8 +670,10 @@ Compaction* CompactionPicker::CompactRange( return nullptr; } - if (covering_the_whole_range) { + if (covering_the_whole_range || !next_smallest) { *compaction_end = nullptr; + } else { + **compaction_end = *next_smallest; } CompactionInputFiles output_level_inputs; diff --git a/db/compaction_picker.h b/db/compaction_picker.h index 5f770039a..e9688d754 100644 --- a/db/compaction_picker.h +++ b/db/compaction_picker.h @@ -151,7 +151,8 @@ class CompactionPicker { // Will return false if it is impossible to apply this compaction. bool ExpandInputsToCleanCut(const std::string& cf_name, VersionStorageInfo* vstorage, - CompactionInputFiles* inputs); + CompactionInputFiles* inputs, + InternalKey** next_smallest = nullptr); // Returns true if any one of the parent files are being compacted bool IsRangeInCompaction(VersionStorageInfo* vstorage, diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 423480cd9..7dd717cf7 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -3959,6 +3959,50 @@ INSTANTIATE_TEST_CASE_P( CompactionPri::kOldestSmallestSeqFirst, CompactionPri::kMinOverlappingRatio)); +class NoopMergeOperator : public MergeOperator { + public: + NoopMergeOperator() {} + + virtual bool FullMergeV2(const MergeOperationInput& /*merge_in*/, + MergeOperationOutput* merge_out) const override { + std::string val("bar"); + merge_out->new_value = val; + return true; + } + + virtual const char* Name() const override { return "Noop"; } +}; + +TEST_F(DBCompactionTest, PartialManualCompaction) { + Options opts = CurrentOptions(); + opts.num_levels = 3; + opts.level0_file_num_compaction_trigger = 10; + opts.compression = kNoCompression; + opts.merge_operator.reset(new NoopMergeOperator()); + opts.target_file_size_base = 10240; + DestroyAndReopen(opts); + + Random rnd(301); + for (auto i = 0; i < 8; ++i) { + for (auto j = 0; j < 10; ++j) { + Merge("foo", RandomString(&rnd, 1024)); + } + Flush(); + } + + MoveFilesToLevel(2); + + std::string prop; + EXPECT_TRUE(dbfull()->GetProperty(DB::Properties::kLiveSstFilesSize, &prop)); + uint64_t max_compaction_bytes = atoi(prop.c_str()) / 2; + ASSERT_OK(dbfull()->SetOptions( + {{"max_compaction_bytes", std::to_string(max_compaction_bytes)}})); + + CompactRangeOptions cro; + cro.bottommost_level_compaction = BottommostLevelCompaction::kForce; + dbfull()->CompactRange(cro, nullptr, nullptr); +} + #endif // !defined(ROCKSDB_LITE) } // namespace rocksdb diff --git a/db/version_set.cc b/db/version_set.cc index ede91c613..202b0f35b 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2032,7 +2032,7 @@ bool VersionStorageInfo::OverlapInLevel(int level, void VersionStorageInfo::GetOverlappingInputs( int level, const InternalKey* begin, const InternalKey* end, std::vector* inputs, int hint_index, int* file_index, - bool expand_range) const { + bool expand_range, InternalKey** next_smallest) const { if (level >= num_non_empty_levels_) { // this level is empty, no overlapping inputs return; @@ -2051,11 +2051,17 @@ void VersionStorageInfo::GetOverlappingInputs( } const Comparator* user_cmp = user_comparator_; if (level > 0) { - GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, - hint_index, file_index); + GetOverlappingInputsRangeBinarySearch(level, begin, end, inputs, hint_index, + file_index, false, next_smallest); return; } + if (next_smallest) { + // next_smallest key only makes sense for non-level 0, where files are + // non-overlapping + *next_smallest = nullptr; + } + for (size_t i = 0; i < level_files_brief_[level].num_files; ) { FdWithKeyRange* f = &(level_files_brief_[level].files[i++]); const Slice file_start = ExtractUserKey(f->smallest_key); @@ -2183,7 +2189,7 @@ int sstableKeyCompare(const Comparator* user_cmp, void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch( int level, const InternalKey* begin, const InternalKey* end, std::vector* inputs, int hint_index, int* file_index, - bool within_interval) const { + bool within_interval, InternalKey** next_smallest) const { assert(level > 0); int min = 0; int mid = 0; @@ -2219,6 +2225,9 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch( // If there were no overlapping files, return immediately. if (!foundOverlap) { + if (next_smallest) { + next_smallest = nullptr; + } return; } // returns the index where an overlap is found @@ -2239,6 +2248,15 @@ void VersionStorageInfo::GetOverlappingInputsRangeBinarySearch( for (int i = start_index; i <= end_index; i++) { inputs->push_back(files_[level][i]); } + + if (next_smallest != nullptr) { + // Provide the next key outside the range covered by inputs + if (++end_index < static_cast(files_[level].size())) { + **next_smallest = files_[level][end_index]->smallest; + } else { + *next_smallest = nullptr; + } + } } // Store in *start_index and *end_index the range of all files in diff --git a/db/version_set.h b/db/version_set.h index 4e3b2bec2..20521fa06 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -188,9 +188,11 @@ class VersionStorageInfo { std::vector* inputs, int hint_index = -1, // index of overlap file int* file_index = nullptr, // return index of overlap file - bool expand_range = true) // if set, returns files which overlap the - const; // range and overlap each other. If false, + bool expand_range = true, // if set, returns files which overlap the + // range and overlap each other. If false, // then just files intersecting the range + InternalKey** next_smallest = nullptr) // if non-null, returns the + const; // smallest key of next file not included void GetCleanInputsWithinInterval( int level, const InternalKey* begin, // nullptr means before all keys const InternalKey* end, // nullptr means after all keys @@ -200,14 +202,15 @@ class VersionStorageInfo { const; void GetOverlappingInputsRangeBinarySearch( - int level, // level > 0 + int level, // level > 0 const InternalKey* begin, // nullptr means before all keys const InternalKey* end, // nullptr means after all keys std::vector* inputs, int hint_index, // index of overlap file int* file_index, // return index of overlap file - bool within_interval = false) // if set, force the inputs within interval - const; + bool within_interval = false, // if set, force the inputs within interval + InternalKey** next_smallest = nullptr) // if non-null, returns the + const; // smallest key of next file not included void ExtendFileRangeOverlappingInterval( int level,