Deallocate payload of BlockBasedTableBuilder::Rep::FilterBlockBuilder earlier for Full/PartitionedFilter (#9070)

Summary:
Note: This PR is the 1st part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073).

Context:
Previously, the payload (i.e, filter data) within `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object is not deallocated until `BlockBasedTableBuilder` is deallocated, despite it is no longer useful after its related `filter_content` being written.
- Transferred the payload (i.e, the filter data) out of `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object
- For PartitionedFilter:
   - Unified `filters` and `filter_gc` lists into one `std::deque<FilterEntry> filters` by adding a new field `last_filter_entry_key` and storing the `std::unique_ptr filter_data` with the `Slice filter` in the same entry
   - Reset `last_filter_data` in the case where `filters` is empty, which should be as by then we would've finish using all the `Slice filter`
- Deallocated the payload by going out of scope as soon as we're done with using the `filter_content` associated with the payload
- This is an internal interface change at the level of `FilterBlockBuilder::Finish()`, which leads to touching the inherited interface in `BlockBasedFilterBlockBuilder`. But for that, the payload transferring is ignored.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9070

Test Plan: - The main focus is to catch segment fault error during `FilterBlockBuilder::Finish()` and `BlockBasedTableBuilder::Finish()` and interface mismatch. Relying on existing CI tests is enough as `assert(false)` was temporarily added to verify the new logic of transferring ownership indeed run

Reviewed By: pdillinger

Differential Revision: D31884933

Pulled By: hx235

fbshipit-source-id: f73ecfbea13788d4fc058013ace27230110b52f4
This commit is contained in:
Hui Xiao 2021-11-04 13:29:09 -07:00 committed by Facebook GitHub Bot
parent a64c8ca7a8
commit 1ababeb76a
10 changed files with 65 additions and 24 deletions

View File

@ -25,6 +25,9 @@
* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%. * Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%.
* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true. * `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true.
### Performance Improvements
* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070)
## 6.26.0 (2021-10-20) ## 6.26.0 (2021-10-20)
### Bug Fixes ### Bug Fixes
* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets. * Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets.

View File

@ -117,9 +117,10 @@ inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
} }
} }
Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, Slice BlockBasedFilterBlockBuilder::Finish(
Status* status) { const BlockHandle& /*tmp*/, Status* status,
// In this impl we ignore BlockHandle std::unique_ptr<const char[]>* /* filter_data */) {
// In this impl we ignore BlockHandle and filter_data
*status = Status::OK(); *status = Status::OK();
if (!start_.empty()) { if (!start_.empty()) {

View File

@ -49,7 +49,9 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
return start_.empty() && filter_offsets_.empty(); return start_.empty() && filter_offsets_.empty();
} }
virtual size_t EstimateEntriesAdded() override; virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& tmp, Status* status) override; virtual Slice Finish(
const BlockHandle& tmp, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
using FilterBlockBuilder::Finish; using FilterBlockBuilder::Finish;
private: private:

View File

@ -1542,8 +1542,17 @@ void BlockBasedTableBuilder::WriteFilterBlock(
rep_->filter_builder->EstimateEntriesAdded(); rep_->filter_builder->EstimateEntriesAdded();
Status s = Status::Incomplete(); Status s = Status::Incomplete();
while (ok() && s.IsIncomplete()) { while (ok() && s.IsIncomplete()) {
// filter_data is used to store the transferred filter data payload from
// FilterBlockBuilder and deallocate the payload by going out of scope.
// Otherwise, the payload will unnecessarily remain until
// BlockBasedTableBuilder is deallocated.
//
// See FilterBlockBuilder::Finish() for more on the difference in
// transferred filter data payload among different FilterBlockBuilder
// subtypes.
std::unique_ptr<const char[]> filter_data;
Slice filter_content = Slice filter_content =
rep_->filter_builder->Finish(filter_block_handle, &s); rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data);
assert(s.ok() || s.IsIncomplete()); assert(s.ok() || s.IsIncomplete());
rep_->props.filter_size += filter_content.size(); rep_->props.filter_size += filter_content.size();
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle, WriteRawBlock(filter_content, kNoCompression, &filter_block_handle,

View File

@ -73,7 +73,14 @@ class FilterBlockBuilder {
assert(dont_care_status.ok()); assert(dont_care_status.ok());
return ret; return ret;
} }
virtual Slice Finish(const BlockHandle& tmp, Status* status) = 0; // If filter_data is not nullptr, Finish() may transfer ownership of
// underlying filter data to the caller, so that it can be freed as soon as
// possible.
virtual Slice Finish(
const BlockHandle& tmp /* only used in PartitionedFilterBlock as
last_partition_block_handle */
,
Status* status, std::unique_ptr<const char[]>* filter_data = nullptr) = 0;
}; };
// A FilterBlockReader is used to parse filter from SST table. // A FilterBlockReader is used to parse filter from SST table.

View File

@ -101,14 +101,17 @@ void FullFilterBlockBuilder::Reset() {
last_prefix_recorded_ = false; last_prefix_recorded_ = false;
} }
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, Slice FullFilterBlockBuilder::Finish(
Status* status) { const BlockHandle& /*tmp*/, Status* status,
std::unique_ptr<const char[]>* filter_data) {
Reset(); Reset();
// In this impl we ignore BlockHandle // In this impl we ignore BlockHandle
*status = Status::OK(); *status = Status::OK();
if (any_added_) { if (any_added_) {
any_added_ = false; any_added_ = false;
return filter_bits_builder_->Finish(&filter_data_); Slice filter_content =
filter_bits_builder_->Finish(filter_data ? filter_data : &filter_data_);
return filter_content;
} }
return Slice(); return Slice();
} }

View File

@ -54,7 +54,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
virtual void Add(const Slice& key_without_ts) override; virtual void Add(const Slice& key_without_ts) override;
virtual bool IsEmpty() const override { return !any_added_; } virtual bool IsEmpty() const override { return !any_added_; }
virtual size_t EstimateEntriesAdded() override; virtual size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& tmp, Status* status) override; virtual Slice Finish(
const BlockHandle& tmp, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
using FilterBlockBuilder::Finish; using FilterBlockBuilder::Finish;
protected: protected:

View File

@ -73,7 +73,6 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
if (!p_index_builder_->ShouldCutFilterBlock()) { if (!p_index_builder_->ShouldCutFilterBlock()) {
return; return;
} }
filter_gc.push_back(std::unique_ptr<const char[]>(nullptr));
// Add the prefix of the next key before finishing the partition without // Add the prefix of the next key before finishing the partition without
// updating last_prefix_str_. This hack, fixes a bug with format_verison=3 // updating last_prefix_str_. This hack, fixes a bug with format_verison=3
@ -88,9 +87,10 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
} }
total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded(); total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded();
Slice filter = filter_bits_builder_->Finish(&filter_gc.back()); std::unique_ptr<const char[]> filter_data;
Slice filter = filter_bits_builder_->Finish(&filter_data);
std::string& index_key = p_index_builder_->GetPartitionKey(); std::string& index_key = p_index_builder_->GetPartitionKey();
filters.push_back({index_key, filter}); filters.push_back({index_key, filter, std::move(filter_data)});
keys_added_to_partition_ = 0; keys_added_to_partition_ = 0;
Reset(); Reset();
} }
@ -110,10 +110,10 @@ size_t PartitionedFilterBlockBuilder::EstimateEntriesAdded() {
} }
Slice PartitionedFilterBlockBuilder::Finish( Slice PartitionedFilterBlockBuilder::Finish(
const BlockHandle& last_partition_block_handle, Status* status) { const BlockHandle& last_partition_block_handle, Status* status,
std::unique_ptr<const char[]>* filter_data) {
if (finishing_filters == true) { if (finishing_filters == true) {
// Record the handle of the last written filter block in the index // Record the handle of the last written filter block in the index
FilterEntry& last_entry = filters.front();
std::string handle_encoding; std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding); last_partition_block_handle.EncodeTo(&handle_encoding);
std::string handle_delta_encoding; std::string handle_delta_encoding;
@ -122,14 +122,13 @@ Slice PartitionedFilterBlockBuilder::Finish(
last_partition_block_handle.size() - last_encoded_handle_.size()); last_partition_block_handle.size() - last_encoded_handle_.size());
last_encoded_handle_ = last_partition_block_handle; last_encoded_handle_ = last_partition_block_handle;
const Slice handle_delta_encoding_slice(handle_delta_encoding); const Slice handle_delta_encoding_slice(handle_delta_encoding);
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding, index_on_filter_block_builder_.Add(last_filter_entry_key, handle_encoding,
&handle_delta_encoding_slice); &handle_delta_encoding_slice);
if (!p_index_builder_->seperator_is_key_plus_seq()) { if (!p_index_builder_->seperator_is_key_plus_seq()) {
index_on_filter_block_builder_without_seq_.Add( index_on_filter_block_builder_without_seq_.Add(
ExtractUserKey(last_entry.key), handle_encoding, ExtractUserKey(last_filter_entry_key), handle_encoding,
&handle_delta_encoding_slice); &handle_delta_encoding_slice);
} }
filters.pop_front();
} else { } else {
MaybeCutAFilterBlock(nullptr); MaybeCutAFilterBlock(nullptr);
} }
@ -137,6 +136,7 @@ Slice PartitionedFilterBlockBuilder::Finish(
// partitions // partitions
if (UNLIKELY(filters.empty())) { if (UNLIKELY(filters.empty())) {
*status = Status::OK(); *status = Status::OK();
last_filter_data.reset();
if (finishing_filters) { if (finishing_filters) {
// Simplest to just add them all at the end // Simplest to just add them all at the end
total_added_in_built_ = 0; total_added_in_built_ = 0;
@ -154,7 +154,15 @@ Slice PartitionedFilterBlockBuilder::Finish(
// indicate we expect more calls to Finish // indicate we expect more calls to Finish
*status = Status::Incomplete(); *status = Status::Incomplete();
finishing_filters = true; finishing_filters = true;
return filters.front().filter;
last_filter_entry_key = filters.front().key;
Slice filter = filters.front().filter;
last_filter_data = std::move(filters.front().filter_data);
if (filter_data != nullptr) {
*filter_data = std::move(last_filter_data);
}
filters.pop_front();
return filter;
} }
} }

View File

@ -5,6 +5,7 @@
#pragma once #pragma once
#include <deque>
#include <list> #include <list>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
@ -36,8 +37,9 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
void Add(const Slice& key) override; void Add(const Slice& key) override;
size_t EstimateEntriesAdded() override; size_t EstimateEntriesAdded() override;
virtual Slice Finish(const BlockHandle& last_partition_block_handle, virtual Slice Finish(
Status* status) override; const BlockHandle& last_partition_block_handle, Status* status,
std::unique_ptr<const char[]>* filter_data = nullptr) override;
private: private:
// Filter data // Filter data
@ -47,10 +49,13 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
struct FilterEntry { struct FilterEntry {
std::string key; std::string key;
Slice filter; Slice filter;
std::unique_ptr<const char[]> filter_data;
}; };
std::list<FilterEntry> filters; // list of partitioned indexes and their keys std::deque<FilterEntry> filters; // list of partitioned filters and keys used
// in building the index
std::string last_filter_entry_key;
std::unique_ptr<const char[]> last_filter_data;
std::unique_ptr<IndexBuilder> value; std::unique_ptr<IndexBuilder> value;
std::vector<std::unique_ptr<const char[]>> filter_gc;
bool finishing_filters = bool finishing_filters =
false; // true if Finish is called once but not complete yet. false; // true if Finish is called once but not complete yet.
// The policy of when cut a filter block and Finish it // The policy of when cut a filter block and Finish it

View File

@ -136,8 +136,9 @@ class PartitionedFilterBlockTest
BlockHandle bh; BlockHandle bh;
Status status; Status status;
Slice slice; Slice slice;
std::unique_ptr<const char[]> filter_data;
do { do {
slice = builder->Finish(bh, &status); slice = builder->Finish(bh, &status, &filter_data);
bh = Write(slice); bh = Write(slice);
} while (status.IsIncomplete()); } while (status.IsIncomplete());