Delete files in multiple ranges at once
Summary: Using `DeleteFilesInRange` to delete files in a lot of ranges can be slow, because `VersionSet::LogAndApply` is expensive. This PR adds a new `DeleteFilesInRange` function to delete files in multiple ranges at once. Close https://github.com/facebook/rocksdb/issues/2951 Closes https://github.com/facebook/rocksdb/pull/3431 Differential Revision: D6849228 Pulled By: ajkr fbshipit-source-id: daeedcabd8def4b1d9ee95a58266dee77b5d68cb
This commit is contained in:
parent
77dc069eb9
commit
ab43ff58b5
@ -2,9 +2,11 @@
|
||||
## Unreleased
|
||||
### Public API Change
|
||||
* Iterator::SeekForPrev is now a pure virtual method. This is to prevent user who implement the Iterator interface fail to implement SeekForPrev by mistake.
|
||||
- Add `include_end` option to make the range end exclusive when `include_end == false` in `DeleteFilesInRange()`.
|
||||
|
||||
### New Features
|
||||
* Improve the performance of iterators doing long range scans by using readahead.
|
||||
- Add new function `DeleteFilesInRanges()` to delete files in multiple ranges at once for better performance.
|
||||
|
||||
### Bug Fixes
|
||||
* Fix `DisableFileDeletions()` followed by `GetSortedWalFiles()` to not return obsolete WAL files that `PurgeObsoleteFiles()` is going to delete.
|
||||
|
@ -19,9 +19,17 @@ void CancelAllBackgroundWork(DB* db, bool wait) {
|
||||
}
|
||||
|
||||
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
|
||||
const Slice* begin, const Slice* end) {
|
||||
const Slice* begin, const Slice* end,
|
||||
bool include_end) {
|
||||
RangePtr range(begin, end);
|
||||
return DeleteFilesInRanges(db, column_family, &range, 1, include_end);
|
||||
}
|
||||
|
||||
Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
|
||||
const RangePtr* ranges, size_t n,
|
||||
bool include_end) {
|
||||
return (static_cast_with_check<DBImpl, DB>(db->GetRootDB()))
|
||||
->DeleteFilesInRange(column_family, begin, end);
|
||||
->DeleteFilesInRanges(column_family, ranges, n, include_end);
|
||||
}
|
||||
|
||||
Status VerifySstFileChecksum(const Options& options,
|
||||
|
@ -1517,6 +1517,122 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
|
||||
ASSERT_GT(old_num_files, new_num_files);
|
||||
}
|
||||
|
||||
TEST_F(DBCompactionTest, DeleteFilesInRanges) {
|
||||
Options options = CurrentOptions();
|
||||
options.write_buffer_size = 10 * 1024 * 1024;
|
||||
options.max_bytes_for_level_multiplier = 2;
|
||||
options.num_levels = 4;
|
||||
options.max_background_compactions = 3;
|
||||
options.disable_auto_compactions = true;
|
||||
|
||||
DestroyAndReopen(options);
|
||||
int32_t value_size = 10 * 1024; // 10 KB
|
||||
|
||||
Random rnd(301);
|
||||
std::map<int32_t, std::string> values;
|
||||
|
||||
// file [0 => 100), [100 => 200), ... [900, 1000)
|
||||
for (auto i = 0; i < 10; i++) {
|
||||
for (auto j = 0; j < 100; j++) {
|
||||
auto k = i * 100 + j;
|
||||
values[k] = RandomString(&rnd, value_size);
|
||||
ASSERT_OK(Put(Key(k), values[k]));
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
ASSERT_EQ("10", FilesPerLevel(0));
|
||||
CompactRangeOptions compact_options;
|
||||
compact_options.change_level = true;
|
||||
compact_options.target_level = 2;
|
||||
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
||||
ASSERT_EQ("0,0,10", FilesPerLevel(0));
|
||||
|
||||
// file [0 => 100), [200 => 300), ... [800, 900)
|
||||
for (auto i = 0; i < 10; i+=2) {
|
||||
for (auto j = 0; j < 100; j++) {
|
||||
auto k = i * 100 + j;
|
||||
ASSERT_OK(Put(Key(k), values[k]));
|
||||
}
|
||||
ASSERT_OK(Flush());
|
||||
}
|
||||
ASSERT_EQ("5,0,10", FilesPerLevel(0));
|
||||
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
|
||||
ASSERT_EQ("0,5,10", FilesPerLevel(0));
|
||||
|
||||
// Delete files in range [0, 299] (inclusive)
|
||||
{
|
||||
auto begin_str1 = Key(0), end_str1 = Key(100);
|
||||
auto begin_str2 = Key(100), end_str2 = Key(200);
|
||||
auto begin_str3 = Key(200), end_str3 = Key(299);
|
||||
Slice begin1(begin_str1), end1(end_str1);
|
||||
Slice begin2(begin_str2), end2(end_str2);
|
||||
Slice begin3(begin_str3), end3(end_str3);
|
||||
std::vector<RangePtr> ranges;
|
||||
ranges.push_back(RangePtr(&begin1, &end1));
|
||||
ranges.push_back(RangePtr(&begin2, &end2));
|
||||
ranges.push_back(RangePtr(&begin3, &end3));
|
||||
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(),
|
||||
ranges.data(), ranges.size()));
|
||||
ASSERT_EQ("0,3,7", FilesPerLevel(0));
|
||||
|
||||
// Keys [0, 300) should not exist.
|
||||
for (auto i = 0; i < 300; i++) {
|
||||
ReadOptions ropts;
|
||||
std::string result;
|
||||
auto s = db_->Get(ropts, Key(i), &result);
|
||||
ASSERT_TRUE(s.IsNotFound());
|
||||
}
|
||||
for (auto i = 300; i < 1000; i++) {
|
||||
ASSERT_EQ(Get(Key(i)), values[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete files in range [600, 999) (exclusive)
|
||||
{
|
||||
auto begin_str1 = Key(600), end_str1 = Key(800);
|
||||
auto begin_str2 = Key(700), end_str2 = Key(900);
|
||||
auto begin_str3 = Key(800), end_str3 = Key(999);
|
||||
Slice begin1(begin_str1), end1(end_str1);
|
||||
Slice begin2(begin_str2), end2(end_str2);
|
||||
Slice begin3(begin_str3), end3(end_str3);
|
||||
std::vector<RangePtr> ranges;
|
||||
ranges.push_back(RangePtr(&begin1, &end1));
|
||||
ranges.push_back(RangePtr(&begin2, &end2));
|
||||
ranges.push_back(RangePtr(&begin3, &end3));
|
||||
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(),
|
||||
ranges.data(), ranges.size(), false));
|
||||
ASSERT_EQ("0,1,4", FilesPerLevel(0));
|
||||
|
||||
// Keys [600, 900) should not exist.
|
||||
for (auto i = 600; i < 900; i++) {
|
||||
ReadOptions ropts;
|
||||
std::string result;
|
||||
auto s = db_->Get(ropts, Key(i), &result);
|
||||
ASSERT_TRUE(s.IsNotFound());
|
||||
}
|
||||
for (auto i = 300; i < 600; i++) {
|
||||
ASSERT_EQ(Get(Key(i)), values[i]);
|
||||
}
|
||||
for (auto i = 900; i < 1000; i++) {
|
||||
ASSERT_EQ(Get(Key(i)), values[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete all files.
|
||||
{
|
||||
RangePtr range;
|
||||
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), &range, 1));
|
||||
ASSERT_EQ("", FilesPerLevel(0));
|
||||
|
||||
for (auto i = 0; i < 1000; i++) {
|
||||
ReadOptions ropts;
|
||||
std::string result;
|
||||
auto s = db_->Get(ropts, Key(i), &result);
|
||||
ASSERT_TRUE(s.IsNotFound());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
|
||||
// regression test for #2833: groups of files whose user-keys overlap at the
|
||||
// endpoints could be split by `DeleteFilesInRange`. This caused old data to
|
||||
|
@ -2138,19 +2138,22 @@ Status DBImpl::DeleteFile(std::string name) {
|
||||
return status;
|
||||
}
|
||||
|
||||
Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family,
|
||||
const Slice* begin, const Slice* end) {
|
||||
Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
|
||||
const RangePtr* ranges, size_t n,
|
||||
bool include_end) {
|
||||
Status status;
|
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
||||
ColumnFamilyData* cfd = cfh->cfd();
|
||||
VersionEdit edit;
|
||||
std::vector<FileMetaData*> deleted_files;
|
||||
std::set<FileMetaData*> deleted_files;
|
||||
JobContext job_context(next_job_id_.fetch_add(1), true);
|
||||
{
|
||||
InstrumentedMutexLock l(&mutex_);
|
||||
Version* input_version = cfd->current();
|
||||
|
||||
auto* vstorage = input_version->storage_info();
|
||||
for (size_t r = 0; r < n; r++) {
|
||||
auto begin = ranges[r].start, end = ranges[r].limit;
|
||||
for (int i = 1; i < cfd->NumberLevels(); i++) {
|
||||
if (vstorage->LevelFiles(i).empty() ||
|
||||
!vstorage->OverlapInLevel(i, begin, end)) {
|
||||
@ -2180,12 +2183,20 @@ Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family,
|
||||
if (level_file->being_compacted) {
|
||||
continue;
|
||||
}
|
||||
if (deleted_files.find(level_file) != deleted_files.end()) {
|
||||
continue;
|
||||
}
|
||||
if (!include_end && end != nullptr &&
|
||||
cfd->user_comparator()->Compare(level_file->largest.user_key(), *end) == 0) {
|
||||
continue;
|
||||
}
|
||||
edit.SetColumnFamily(cfd->GetID());
|
||||
edit.DeleteFile(i, level_file->fd.GetNumber());
|
||||
deleted_files.push_back(level_file);
|
||||
deleted_files.insert(level_file);
|
||||
level_file->being_compacted = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (edit.GetDeletedFiles().empty()) {
|
||||
job_context.Clean();
|
||||
return Status::OK();
|
||||
|
@ -247,8 +247,9 @@ class DBImpl : public DB {
|
||||
const TransactionLogIterator::ReadOptions&
|
||||
read_options = TransactionLogIterator::ReadOptions()) override;
|
||||
virtual Status DeleteFile(std::string name) override;
|
||||
Status DeleteFilesInRange(ColumnFamilyHandle* column_family,
|
||||
const Slice* begin, const Slice* end);
|
||||
Status DeleteFilesInRanges(ColumnFamilyHandle* column_family,
|
||||
const RangePtr* ranges, size_t n,
|
||||
bool include_end = true);
|
||||
|
||||
virtual void GetLiveFilesMetaData(
|
||||
std::vector<LiveFileMetaData>* metadata) override;
|
||||
|
@ -329,7 +329,15 @@ void CancelAllBackgroundWork(DB* db, bool wait = false);
|
||||
// in the range.
|
||||
// Snapshots before the delete might not see the data in the given range.
|
||||
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
|
||||
const Slice* begin, const Slice* end);
|
||||
const Slice* begin, const Slice* end,
|
||||
bool include_end = true);
|
||||
|
||||
// Delete files in multiple ranges at once
|
||||
// Delete files in a lot of ranges one at a time can be slow, use this API for
|
||||
// better performance in that case.
|
||||
Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
|
||||
const RangePtr* ranges, size_t n,
|
||||
bool include_end = true);
|
||||
|
||||
// Verify the checksum of file
|
||||
Status VerifySstFileChecksum(const Options& options,
|
||||
|
@ -100,6 +100,14 @@ struct Range {
|
||||
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
|
||||
};
|
||||
|
||||
struct RangePtr {
|
||||
const Slice* start;
|
||||
const Slice* limit;
|
||||
|
||||
RangePtr() : start(nullptr), limit(nullptr) { }
|
||||
RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) { }
|
||||
};
|
||||
|
||||
// A collections of table properties objects, where
|
||||
// key: is the table's file name.
|
||||
// value: the table properties object of the given table.
|
||||
|
Loading…
x
Reference in New Issue
Block a user