From 1ababeb76ac957c2bdbc896669feb2e722623705 Mon Sep 17 00:00:00 2001 From: Hui Xiao Date: Thu, 4 Nov 2021 13:29:09 -0700 Subject: [PATCH] Deallocate payload of BlockBasedTableBuilder::Rep::FilterBlockBuilder earlier for Full/PartitionedFilter (#9070) Summary: Note: This PR is the 1st part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073). Context: Previously, the payload (i.e, filter data) within `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object is not deallocated until `BlockBasedTableBuilder` is deallocated, despite it is no longer useful after its related `filter_content` being written. - Transferred the payload (i.e, the filter data) out of `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object - For PartitionedFilter: - Unified `filters` and `filter_gc` lists into one `std::deque filters` by adding a new field `last_filter_entry_key` and storing the `std::unique_ptr filter_data` with the `Slice filter` in the same entry - Reset `last_filter_data` in the case where `filters` is empty, which should be as by then we would've finish using all the `Slice filter` - Deallocated the payload by going out of scope as soon as we're done with using the `filter_content` associated with the payload - This is an internal interface change at the level of `FilterBlockBuilder::Finish()`, which leads to touching the inherited interface in `BlockBasedFilterBlockBuilder`. But for that, the payload transferring is ignored. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9070 Test Plan: - The main focus is to catch segment fault error during `FilterBlockBuilder::Finish()` and `BlockBasedTableBuilder::Finish()` and interface mismatch. Relying on existing CI tests is enough as `assert(false)` was temporarily added to verify the new logic of transferring ownership indeed run Reviewed By: pdillinger Differential Revision: D31884933 Pulled By: hx235 fbshipit-source-id: f73ecfbea13788d4fc058013ace27230110b52f4 --- HISTORY.md | 3 +++ table/block_based/block_based_filter_block.cc | 7 ++--- table/block_based/block_based_filter_block.h | 4 ++- .../block_based/block_based_table_builder.cc | 11 +++++++- table/block_based/filter_block.h | 9 ++++++- table/block_based/full_filter_block.cc | 9 ++++--- table/block_based/full_filter_block.h | 4 ++- table/block_based/partitioned_filter_block.cc | 26 ++++++++++++------- table/block_based/partitioned_filter_block.h | 13 +++++++--- .../partitioned_filter_block_test.cc | 3 ++- 10 files changed, 65 insertions(+), 24 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index e5f7e7a8e..319906a6c 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -25,6 +25,9 @@ * Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%. * `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true. +### Performance Improvements +* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070) + ## 6.26.0 (2021-10-20) ### Bug Fixes * Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets. diff --git a/table/block_based/block_based_filter_block.cc b/table/block_based/block_based_filter_block.cc index bb931cb2b..a6ee462de 100644 --- a/table/block_based/block_based_filter_block.cc +++ b/table/block_based/block_based_filter_block.cc @@ -117,9 +117,10 @@ inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { } } -Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, - Status* status) { - // In this impl we ignore BlockHandle +Slice BlockBasedFilterBlockBuilder::Finish( + const BlockHandle& /*tmp*/, Status* status, + std::unique_ptr* /* filter_data */) { + // In this impl we ignore BlockHandle and filter_data *status = Status::OK(); if (!start_.empty()) { diff --git a/table/block_based/block_based_filter_block.h b/table/block_based/block_based_filter_block.h index 0b46cd7c1..fd20df846 100644 --- a/table/block_based/block_based_filter_block.h +++ b/table/block_based/block_based_filter_block.h @@ -49,7 +49,9 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder { return start_.empty() && filter_offsets_.empty(); } virtual size_t EstimateEntriesAdded() override; - virtual Slice Finish(const BlockHandle& tmp, Status* status) override; + virtual Slice Finish( + const BlockHandle& tmp, Status* status, + std::unique_ptr* filter_data = nullptr) override; using FilterBlockBuilder::Finish; private: diff --git a/table/block_based/block_based_table_builder.cc b/table/block_based/block_based_table_builder.cc index 2a98609d5..a704007af 100644 --- a/table/block_based/block_based_table_builder.cc +++ b/table/block_based/block_based_table_builder.cc @@ -1542,8 +1542,17 @@ void BlockBasedTableBuilder::WriteFilterBlock( rep_->filter_builder->EstimateEntriesAdded(); Status s = Status::Incomplete(); while (ok() && s.IsIncomplete()) { + // filter_data is used to store the transferred filter data payload from + // FilterBlockBuilder and deallocate the payload by going out of scope. + // Otherwise, the payload will unnecessarily remain until + // BlockBasedTableBuilder is deallocated. + // + // See FilterBlockBuilder::Finish() for more on the difference in + // transferred filter data payload among different FilterBlockBuilder + // subtypes. + std::unique_ptr filter_data; Slice filter_content = - rep_->filter_builder->Finish(filter_block_handle, &s); + rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data); assert(s.ok() || s.IsIncomplete()); rep_->props.filter_size += filter_content.size(); WriteRawBlock(filter_content, kNoCompression, &filter_block_handle, diff --git a/table/block_based/filter_block.h b/table/block_based/filter_block.h index 1c0b7a5fa..55ab254d9 100644 --- a/table/block_based/filter_block.h +++ b/table/block_based/filter_block.h @@ -73,7 +73,14 @@ class FilterBlockBuilder { assert(dont_care_status.ok()); return ret; } - virtual Slice Finish(const BlockHandle& tmp, Status* status) = 0; + // If filter_data is not nullptr, Finish() may transfer ownership of + // underlying filter data to the caller, so that it can be freed as soon as + // possible. + virtual Slice Finish( + const BlockHandle& tmp /* only used in PartitionedFilterBlock as + last_partition_block_handle */ + , + Status* status, std::unique_ptr* filter_data = nullptr) = 0; }; // A FilterBlockReader is used to parse filter from SST table. diff --git a/table/block_based/full_filter_block.cc b/table/block_based/full_filter_block.cc index 0e336c37f..9803acf99 100644 --- a/table/block_based/full_filter_block.cc +++ b/table/block_based/full_filter_block.cc @@ -101,14 +101,17 @@ void FullFilterBlockBuilder::Reset() { last_prefix_recorded_ = false; } -Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, - Status* status) { +Slice FullFilterBlockBuilder::Finish( + const BlockHandle& /*tmp*/, Status* status, + std::unique_ptr* filter_data) { Reset(); // In this impl we ignore BlockHandle *status = Status::OK(); if (any_added_) { any_added_ = false; - return filter_bits_builder_->Finish(&filter_data_); + Slice filter_content = + filter_bits_builder_->Finish(filter_data ? filter_data : &filter_data_); + return filter_content; } return Slice(); } diff --git a/table/block_based/full_filter_block.h b/table/block_based/full_filter_block.h index bf8e24cc9..fc4dd9f5a 100644 --- a/table/block_based/full_filter_block.h +++ b/table/block_based/full_filter_block.h @@ -54,7 +54,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder { virtual void Add(const Slice& key_without_ts) override; virtual bool IsEmpty() const override { return !any_added_; } virtual size_t EstimateEntriesAdded() override; - virtual Slice Finish(const BlockHandle& tmp, Status* status) override; + virtual Slice Finish( + const BlockHandle& tmp, Status* status, + std::unique_ptr* filter_data = nullptr) override; using FilterBlockBuilder::Finish; protected: diff --git a/table/block_based/partitioned_filter_block.cc b/table/block_based/partitioned_filter_block.cc index 563b677a7..4808a1547 100644 --- a/table/block_based/partitioned_filter_block.cc +++ b/table/block_based/partitioned_filter_block.cc @@ -73,7 +73,6 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock( if (!p_index_builder_->ShouldCutFilterBlock()) { return; } - filter_gc.push_back(std::unique_ptr(nullptr)); // Add the prefix of the next key before finishing the partition without // updating last_prefix_str_. This hack, fixes a bug with format_verison=3 @@ -88,9 +87,10 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock( } total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded(); - Slice filter = filter_bits_builder_->Finish(&filter_gc.back()); + std::unique_ptr filter_data; + Slice filter = filter_bits_builder_->Finish(&filter_data); std::string& index_key = p_index_builder_->GetPartitionKey(); - filters.push_back({index_key, filter}); + filters.push_back({index_key, filter, std::move(filter_data)}); keys_added_to_partition_ = 0; Reset(); } @@ -110,10 +110,10 @@ size_t PartitionedFilterBlockBuilder::EstimateEntriesAdded() { } Slice PartitionedFilterBlockBuilder::Finish( - const BlockHandle& last_partition_block_handle, Status* status) { + const BlockHandle& last_partition_block_handle, Status* status, + std::unique_ptr* filter_data) { if (finishing_filters == true) { // Record the handle of the last written filter block in the index - FilterEntry& last_entry = filters.front(); std::string handle_encoding; last_partition_block_handle.EncodeTo(&handle_encoding); std::string handle_delta_encoding; @@ -122,14 +122,13 @@ Slice PartitionedFilterBlockBuilder::Finish( last_partition_block_handle.size() - last_encoded_handle_.size()); last_encoded_handle_ = last_partition_block_handle; const Slice handle_delta_encoding_slice(handle_delta_encoding); - index_on_filter_block_builder_.Add(last_entry.key, handle_encoding, + index_on_filter_block_builder_.Add(last_filter_entry_key, handle_encoding, &handle_delta_encoding_slice); if (!p_index_builder_->seperator_is_key_plus_seq()) { index_on_filter_block_builder_without_seq_.Add( - ExtractUserKey(last_entry.key), handle_encoding, + ExtractUserKey(last_filter_entry_key), handle_encoding, &handle_delta_encoding_slice); } - filters.pop_front(); } else { MaybeCutAFilterBlock(nullptr); } @@ -137,6 +136,7 @@ Slice PartitionedFilterBlockBuilder::Finish( // partitions if (UNLIKELY(filters.empty())) { *status = Status::OK(); + last_filter_data.reset(); if (finishing_filters) { // Simplest to just add them all at the end total_added_in_built_ = 0; @@ -154,7 +154,15 @@ Slice PartitionedFilterBlockBuilder::Finish( // indicate we expect more calls to Finish *status = Status::Incomplete(); finishing_filters = true; - return filters.front().filter; + + last_filter_entry_key = filters.front().key; + Slice filter = filters.front().filter; + last_filter_data = std::move(filters.front().filter_data); + if (filter_data != nullptr) { + *filter_data = std::move(last_filter_data); + } + filters.pop_front(); + return filter; } } diff --git a/table/block_based/partitioned_filter_block.h b/table/block_based/partitioned_filter_block.h index 6393a25ca..94c6a9b95 100644 --- a/table/block_based/partitioned_filter_block.h +++ b/table/block_based/partitioned_filter_block.h @@ -5,6 +5,7 @@ #pragma once +#include #include #include #include @@ -36,8 +37,9 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder { void Add(const Slice& key) override; size_t EstimateEntriesAdded() override; - virtual Slice Finish(const BlockHandle& last_partition_block_handle, - Status* status) override; + virtual Slice Finish( + const BlockHandle& last_partition_block_handle, Status* status, + std::unique_ptr* filter_data = nullptr) override; private: // Filter data @@ -47,10 +49,13 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder { struct FilterEntry { std::string key; Slice filter; + std::unique_ptr filter_data; }; - std::list filters; // list of partitioned indexes and their keys + std::deque filters; // list of partitioned filters and keys used + // in building the index + std::string last_filter_entry_key; + std::unique_ptr last_filter_data; std::unique_ptr value; - std::vector> filter_gc; bool finishing_filters = false; // true if Finish is called once but not complete yet. // The policy of when cut a filter block and Finish it diff --git a/table/block_based/partitioned_filter_block_test.cc b/table/block_based/partitioned_filter_block_test.cc index 7b4d49baf..7def5f250 100644 --- a/table/block_based/partitioned_filter_block_test.cc +++ b/table/block_based/partitioned_filter_block_test.cc @@ -136,8 +136,9 @@ class PartitionedFilterBlockTest BlockHandle bh; Status status; Slice slice; + std::unique_ptr filter_data; do { - slice = builder->Finish(bh, &status); + slice = builder->Finish(bh, &status, &filter_data); bh = Write(slice); } while (status.IsIncomplete());