Delete files in multiple ranges at once
Summary: Using `DeleteFilesInRange` to delete files in a lot of ranges can be slow, because `VersionSet::LogAndApply` is expensive. This PR adds a new `DeleteFilesInRange` function to delete files in multiple ranges at once. Close https://github.com/facebook/rocksdb/issues/2951 Closes https://github.com/facebook/rocksdb/pull/3431 Differential Revision: D6849228 Pulled By: ajkr fbshipit-source-id: daeedcabd8def4b1d9ee95a58266dee77b5d68cb
This commit is contained in:
parent
77dc069eb9
commit
ab43ff58b5
@ -2,9 +2,11 @@
|
|||||||
## Unreleased
|
## Unreleased
|
||||||
### Public API Change
|
### Public API Change
|
||||||
* Iterator::SeekForPrev is now a pure virtual method. This is to prevent user who implement the Iterator interface fail to implement SeekForPrev by mistake.
|
* Iterator::SeekForPrev is now a pure virtual method. This is to prevent user who implement the Iterator interface fail to implement SeekForPrev by mistake.
|
||||||
|
- Add `include_end` option to make the range end exclusive when `include_end == false` in `DeleteFilesInRange()`.
|
||||||
|
|
||||||
### New Features
|
### New Features
|
||||||
* Improve the performance of iterators doing long range scans by using readahead.
|
* Improve the performance of iterators doing long range scans by using readahead.
|
||||||
|
- Add new function `DeleteFilesInRanges()` to delete files in multiple ranges at once for better performance.
|
||||||
|
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
* Fix `DisableFileDeletions()` followed by `GetSortedWalFiles()` to not return obsolete WAL files that `PurgeObsoleteFiles()` is going to delete.
|
* Fix `DisableFileDeletions()` followed by `GetSortedWalFiles()` to not return obsolete WAL files that `PurgeObsoleteFiles()` is going to delete.
|
||||||
|
@ -19,9 +19,17 @@ void CancelAllBackgroundWork(DB* db, bool wait) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
|
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
|
||||||
const Slice* begin, const Slice* end) {
|
const Slice* begin, const Slice* end,
|
||||||
|
bool include_end) {
|
||||||
|
RangePtr range(begin, end);
|
||||||
|
return DeleteFilesInRanges(db, column_family, &range, 1, include_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
|
||||||
|
const RangePtr* ranges, size_t n,
|
||||||
|
bool include_end) {
|
||||||
return (static_cast_with_check<DBImpl, DB>(db->GetRootDB()))
|
return (static_cast_with_check<DBImpl, DB>(db->GetRootDB()))
|
||||||
->DeleteFilesInRange(column_family, begin, end);
|
->DeleteFilesInRanges(column_family, ranges, n, include_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
Status VerifySstFileChecksum(const Options& options,
|
Status VerifySstFileChecksum(const Options& options,
|
||||||
|
@ -1517,6 +1517,122 @@ TEST_F(DBCompactionTest, DeleteFileRange) {
|
|||||||
ASSERT_GT(old_num_files, new_num_files);
|
ASSERT_GT(old_num_files, new_num_files);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DBCompactionTest, DeleteFilesInRanges) {
|
||||||
|
Options options = CurrentOptions();
|
||||||
|
options.write_buffer_size = 10 * 1024 * 1024;
|
||||||
|
options.max_bytes_for_level_multiplier = 2;
|
||||||
|
options.num_levels = 4;
|
||||||
|
options.max_background_compactions = 3;
|
||||||
|
options.disable_auto_compactions = true;
|
||||||
|
|
||||||
|
DestroyAndReopen(options);
|
||||||
|
int32_t value_size = 10 * 1024; // 10 KB
|
||||||
|
|
||||||
|
Random rnd(301);
|
||||||
|
std::map<int32_t, std::string> values;
|
||||||
|
|
||||||
|
// file [0 => 100), [100 => 200), ... [900, 1000)
|
||||||
|
for (auto i = 0; i < 10; i++) {
|
||||||
|
for (auto j = 0; j < 100; j++) {
|
||||||
|
auto k = i * 100 + j;
|
||||||
|
values[k] = RandomString(&rnd, value_size);
|
||||||
|
ASSERT_OK(Put(Key(k), values[k]));
|
||||||
|
}
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
}
|
||||||
|
ASSERT_EQ("10", FilesPerLevel(0));
|
||||||
|
CompactRangeOptions compact_options;
|
||||||
|
compact_options.change_level = true;
|
||||||
|
compact_options.target_level = 2;
|
||||||
|
ASSERT_OK(db_->CompactRange(compact_options, nullptr, nullptr));
|
||||||
|
ASSERT_EQ("0,0,10", FilesPerLevel(0));
|
||||||
|
|
||||||
|
// file [0 => 100), [200 => 300), ... [800, 900)
|
||||||
|
for (auto i = 0; i < 10; i+=2) {
|
||||||
|
for (auto j = 0; j < 100; j++) {
|
||||||
|
auto k = i * 100 + j;
|
||||||
|
ASSERT_OK(Put(Key(k), values[k]));
|
||||||
|
}
|
||||||
|
ASSERT_OK(Flush());
|
||||||
|
}
|
||||||
|
ASSERT_EQ("5,0,10", FilesPerLevel(0));
|
||||||
|
ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
|
||||||
|
ASSERT_EQ("0,5,10", FilesPerLevel(0));
|
||||||
|
|
||||||
|
// Delete files in range [0, 299] (inclusive)
|
||||||
|
{
|
||||||
|
auto begin_str1 = Key(0), end_str1 = Key(100);
|
||||||
|
auto begin_str2 = Key(100), end_str2 = Key(200);
|
||||||
|
auto begin_str3 = Key(200), end_str3 = Key(299);
|
||||||
|
Slice begin1(begin_str1), end1(end_str1);
|
||||||
|
Slice begin2(begin_str2), end2(end_str2);
|
||||||
|
Slice begin3(begin_str3), end3(end_str3);
|
||||||
|
std::vector<RangePtr> ranges;
|
||||||
|
ranges.push_back(RangePtr(&begin1, &end1));
|
||||||
|
ranges.push_back(RangePtr(&begin2, &end2));
|
||||||
|
ranges.push_back(RangePtr(&begin3, &end3));
|
||||||
|
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(),
|
||||||
|
ranges.data(), ranges.size()));
|
||||||
|
ASSERT_EQ("0,3,7", FilesPerLevel(0));
|
||||||
|
|
||||||
|
// Keys [0, 300) should not exist.
|
||||||
|
for (auto i = 0; i < 300; i++) {
|
||||||
|
ReadOptions ropts;
|
||||||
|
std::string result;
|
||||||
|
auto s = db_->Get(ropts, Key(i), &result);
|
||||||
|
ASSERT_TRUE(s.IsNotFound());
|
||||||
|
}
|
||||||
|
for (auto i = 300; i < 1000; i++) {
|
||||||
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete files in range [600, 999) (exclusive)
|
||||||
|
{
|
||||||
|
auto begin_str1 = Key(600), end_str1 = Key(800);
|
||||||
|
auto begin_str2 = Key(700), end_str2 = Key(900);
|
||||||
|
auto begin_str3 = Key(800), end_str3 = Key(999);
|
||||||
|
Slice begin1(begin_str1), end1(end_str1);
|
||||||
|
Slice begin2(begin_str2), end2(end_str2);
|
||||||
|
Slice begin3(begin_str3), end3(end_str3);
|
||||||
|
std::vector<RangePtr> ranges;
|
||||||
|
ranges.push_back(RangePtr(&begin1, &end1));
|
||||||
|
ranges.push_back(RangePtr(&begin2, &end2));
|
||||||
|
ranges.push_back(RangePtr(&begin3, &end3));
|
||||||
|
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(),
|
||||||
|
ranges.data(), ranges.size(), false));
|
||||||
|
ASSERT_EQ("0,1,4", FilesPerLevel(0));
|
||||||
|
|
||||||
|
// Keys [600, 900) should not exist.
|
||||||
|
for (auto i = 600; i < 900; i++) {
|
||||||
|
ReadOptions ropts;
|
||||||
|
std::string result;
|
||||||
|
auto s = db_->Get(ropts, Key(i), &result);
|
||||||
|
ASSERT_TRUE(s.IsNotFound());
|
||||||
|
}
|
||||||
|
for (auto i = 300; i < 600; i++) {
|
||||||
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
||||||
|
}
|
||||||
|
for (auto i = 900; i < 1000; i++) {
|
||||||
|
ASSERT_EQ(Get(Key(i)), values[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete all files.
|
||||||
|
{
|
||||||
|
RangePtr range;
|
||||||
|
ASSERT_OK(DeleteFilesInRanges(db_, db_->DefaultColumnFamily(), &range, 1));
|
||||||
|
ASSERT_EQ("", FilesPerLevel(0));
|
||||||
|
|
||||||
|
for (auto i = 0; i < 1000; i++) {
|
||||||
|
ReadOptions ropts;
|
||||||
|
std::string result;
|
||||||
|
auto s = db_->Get(ropts, Key(i), &result);
|
||||||
|
ASSERT_TRUE(s.IsNotFound());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
|
TEST_F(DBCompactionTest, DeleteFileRangeFileEndpointsOverlapBug) {
|
||||||
// regression test for #2833: groups of files whose user-keys overlap at the
|
// regression test for #2833: groups of files whose user-keys overlap at the
|
||||||
// endpoints could be split by `DeleteFilesInRange`. This caused old data to
|
// endpoints could be split by `DeleteFilesInRange`. This caused old data to
|
||||||
|
@ -2138,19 +2138,22 @@ Status DBImpl::DeleteFile(std::string name) {
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family,
|
Status DBImpl::DeleteFilesInRanges(ColumnFamilyHandle* column_family,
|
||||||
const Slice* begin, const Slice* end) {
|
const RangePtr* ranges, size_t n,
|
||||||
|
bool include_end) {
|
||||||
Status status;
|
Status status;
|
||||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
|
||||||
ColumnFamilyData* cfd = cfh->cfd();
|
ColumnFamilyData* cfd = cfh->cfd();
|
||||||
VersionEdit edit;
|
VersionEdit edit;
|
||||||
std::vector<FileMetaData*> deleted_files;
|
std::set<FileMetaData*> deleted_files;
|
||||||
JobContext job_context(next_job_id_.fetch_add(1), true);
|
JobContext job_context(next_job_id_.fetch_add(1), true);
|
||||||
{
|
{
|
||||||
InstrumentedMutexLock l(&mutex_);
|
InstrumentedMutexLock l(&mutex_);
|
||||||
Version* input_version = cfd->current();
|
Version* input_version = cfd->current();
|
||||||
|
|
||||||
auto* vstorage = input_version->storage_info();
|
auto* vstorage = input_version->storage_info();
|
||||||
|
for (size_t r = 0; r < n; r++) {
|
||||||
|
auto begin = ranges[r].start, end = ranges[r].limit;
|
||||||
for (int i = 1; i < cfd->NumberLevels(); i++) {
|
for (int i = 1; i < cfd->NumberLevels(); i++) {
|
||||||
if (vstorage->LevelFiles(i).empty() ||
|
if (vstorage->LevelFiles(i).empty() ||
|
||||||
!vstorage->OverlapInLevel(i, begin, end)) {
|
!vstorage->OverlapInLevel(i, begin, end)) {
|
||||||
@ -2180,12 +2183,20 @@ Status DBImpl::DeleteFilesInRange(ColumnFamilyHandle* column_family,
|
|||||||
if (level_file->being_compacted) {
|
if (level_file->being_compacted) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (deleted_files.find(level_file) != deleted_files.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!include_end && end != nullptr &&
|
||||||
|
cfd->user_comparator()->Compare(level_file->largest.user_key(), *end) == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
edit.SetColumnFamily(cfd->GetID());
|
edit.SetColumnFamily(cfd->GetID());
|
||||||
edit.DeleteFile(i, level_file->fd.GetNumber());
|
edit.DeleteFile(i, level_file->fd.GetNumber());
|
||||||
deleted_files.push_back(level_file);
|
deleted_files.insert(level_file);
|
||||||
level_file->being_compacted = true;
|
level_file->being_compacted = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (edit.GetDeletedFiles().empty()) {
|
if (edit.GetDeletedFiles().empty()) {
|
||||||
job_context.Clean();
|
job_context.Clean();
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
|
@ -247,8 +247,9 @@ class DBImpl : public DB {
|
|||||||
const TransactionLogIterator::ReadOptions&
|
const TransactionLogIterator::ReadOptions&
|
||||||
read_options = TransactionLogIterator::ReadOptions()) override;
|
read_options = TransactionLogIterator::ReadOptions()) override;
|
||||||
virtual Status DeleteFile(std::string name) override;
|
virtual Status DeleteFile(std::string name) override;
|
||||||
Status DeleteFilesInRange(ColumnFamilyHandle* column_family,
|
Status DeleteFilesInRanges(ColumnFamilyHandle* column_family,
|
||||||
const Slice* begin, const Slice* end);
|
const RangePtr* ranges, size_t n,
|
||||||
|
bool include_end = true);
|
||||||
|
|
||||||
virtual void GetLiveFilesMetaData(
|
virtual void GetLiveFilesMetaData(
|
||||||
std::vector<LiveFileMetaData>* metadata) override;
|
std::vector<LiveFileMetaData>* metadata) override;
|
||||||
|
@ -329,7 +329,15 @@ void CancelAllBackgroundWork(DB* db, bool wait = false);
|
|||||||
// in the range.
|
// in the range.
|
||||||
// Snapshots before the delete might not see the data in the given range.
|
// Snapshots before the delete might not see the data in the given range.
|
||||||
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
|
Status DeleteFilesInRange(DB* db, ColumnFamilyHandle* column_family,
|
||||||
const Slice* begin, const Slice* end);
|
const Slice* begin, const Slice* end,
|
||||||
|
bool include_end = true);
|
||||||
|
|
||||||
|
// Delete files in multiple ranges at once
|
||||||
|
// Delete files in a lot of ranges one at a time can be slow, use this API for
|
||||||
|
// better performance in that case.
|
||||||
|
Status DeleteFilesInRanges(DB* db, ColumnFamilyHandle* column_family,
|
||||||
|
const RangePtr* ranges, size_t n,
|
||||||
|
bool include_end = true);
|
||||||
|
|
||||||
// Verify the checksum of file
|
// Verify the checksum of file
|
||||||
Status VerifySstFileChecksum(const Options& options,
|
Status VerifySstFileChecksum(const Options& options,
|
||||||
|
@ -100,6 +100,14 @@ struct Range {
|
|||||||
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
|
Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct RangePtr {
|
||||||
|
const Slice* start;
|
||||||
|
const Slice* limit;
|
||||||
|
|
||||||
|
RangePtr() : start(nullptr), limit(nullptr) { }
|
||||||
|
RangePtr(const Slice* s, const Slice* l) : start(s), limit(l) { }
|
||||||
|
};
|
||||||
|
|
||||||
// A collections of table properties objects, where
|
// A collections of table properties objects, where
|
||||||
// key: is the table's file name.
|
// key: is the table's file name.
|
||||||
// value: the table properties object of the given table.
|
// value: the table properties object of the given table.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user