Deallocate payload of BlockBasedTableBuilder::Rep::FilterBlockBuilder earlier for Full/PartitionedFilter (#9070)
Summary: Note: This PR is the 1st part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073). Context: Previously, the payload (i.e, filter data) within `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object is not deallocated until `BlockBasedTableBuilder` is deallocated, despite it is no longer useful after its related `filter_content` being written. - Transferred the payload (i.e, the filter data) out of `BlockBasedTableBuilder::Rep::FilterBlockBuilder` object - For PartitionedFilter: - Unified `filters` and `filter_gc` lists into one `std::deque<FilterEntry> filters` by adding a new field `last_filter_entry_key` and storing the `std::unique_ptr filter_data` with the `Slice filter` in the same entry - Reset `last_filter_data` in the case where `filters` is empty, which should be as by then we would've finish using all the `Slice filter` - Deallocated the payload by going out of scope as soon as we're done with using the `filter_content` associated with the payload - This is an internal interface change at the level of `FilterBlockBuilder::Finish()`, which leads to touching the inherited interface in `BlockBasedFilterBlockBuilder`. But for that, the payload transferring is ignored. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9070 Test Plan: - The main focus is to catch segment fault error during `FilterBlockBuilder::Finish()` and `BlockBasedTableBuilder::Finish()` and interface mismatch. Relying on existing CI tests is enough as `assert(false)` was temporarily added to verify the new logic of transferring ownership indeed run Reviewed By: pdillinger Differential Revision: D31884933 Pulled By: hx235 fbshipit-source-id: f73ecfbea13788d4fc058013ace27230110b52f4
This commit is contained in:
parent
a64c8ca7a8
commit
1ababeb76a
@ -25,6 +25,9 @@
|
|||||||
* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%.
|
* Add API `FSDirectory::FsyncWithDirOptions()`, which provides extra information like directory fsync reason in `DirFsyncOptions`. File system like btrfs is using that to skip directory fsync for creating a new file, or when renaming a file, fsync the target file instead of the directory, which improves the `DB::Open()` speed by ~20%.
|
||||||
* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true.
|
* `DB::Open()` is not going be blocked by obsolete file purge if `DBOptions::avoid_unnecessary_blocking_io` is set to true.
|
||||||
|
|
||||||
|
### Performance Improvements
|
||||||
|
* Released some memory related to filter construction earlier in `BlockBasedTableBuilder` for `FullFilter` and `PartitionedFilter` case (#9070)
|
||||||
|
|
||||||
## 6.26.0 (2021-10-20)
|
## 6.26.0 (2021-10-20)
|
||||||
### Bug Fixes
|
### Bug Fixes
|
||||||
* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets.
|
* Fixes a bug in directed IO mode when calling MultiGet() for blobs in the same blob file. The bug is caused by not sorting the blob read requests by file offsets.
|
||||||
|
@ -117,9 +117,10 @@ inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
|
Slice BlockBasedFilterBlockBuilder::Finish(
|
||||||
Status* status) {
|
const BlockHandle& /*tmp*/, Status* status,
|
||||||
// In this impl we ignore BlockHandle
|
std::unique_ptr<const char[]>* /* filter_data */) {
|
||||||
|
// In this impl we ignore BlockHandle and filter_data
|
||||||
*status = Status::OK();
|
*status = Status::OK();
|
||||||
|
|
||||||
if (!start_.empty()) {
|
if (!start_.empty()) {
|
||||||
|
@ -49,7 +49,9 @@ class BlockBasedFilterBlockBuilder : public FilterBlockBuilder {
|
|||||||
return start_.empty() && filter_offsets_.empty();
|
return start_.empty() && filter_offsets_.empty();
|
||||||
}
|
}
|
||||||
virtual size_t EstimateEntriesAdded() override;
|
virtual size_t EstimateEntriesAdded() override;
|
||||||
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
|
virtual Slice Finish(
|
||||||
|
const BlockHandle& tmp, Status* status,
|
||||||
|
std::unique_ptr<const char[]>* filter_data = nullptr) override;
|
||||||
using FilterBlockBuilder::Finish;
|
using FilterBlockBuilder::Finish;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -1542,8 +1542,17 @@ void BlockBasedTableBuilder::WriteFilterBlock(
|
|||||||
rep_->filter_builder->EstimateEntriesAdded();
|
rep_->filter_builder->EstimateEntriesAdded();
|
||||||
Status s = Status::Incomplete();
|
Status s = Status::Incomplete();
|
||||||
while (ok() && s.IsIncomplete()) {
|
while (ok() && s.IsIncomplete()) {
|
||||||
|
// filter_data is used to store the transferred filter data payload from
|
||||||
|
// FilterBlockBuilder and deallocate the payload by going out of scope.
|
||||||
|
// Otherwise, the payload will unnecessarily remain until
|
||||||
|
// BlockBasedTableBuilder is deallocated.
|
||||||
|
//
|
||||||
|
// See FilterBlockBuilder::Finish() for more on the difference in
|
||||||
|
// transferred filter data payload among different FilterBlockBuilder
|
||||||
|
// subtypes.
|
||||||
|
std::unique_ptr<const char[]> filter_data;
|
||||||
Slice filter_content =
|
Slice filter_content =
|
||||||
rep_->filter_builder->Finish(filter_block_handle, &s);
|
rep_->filter_builder->Finish(filter_block_handle, &s, &filter_data);
|
||||||
assert(s.ok() || s.IsIncomplete());
|
assert(s.ok() || s.IsIncomplete());
|
||||||
rep_->props.filter_size += filter_content.size();
|
rep_->props.filter_size += filter_content.size();
|
||||||
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle,
|
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle,
|
||||||
|
@ -73,7 +73,14 @@ class FilterBlockBuilder {
|
|||||||
assert(dont_care_status.ok());
|
assert(dont_care_status.ok());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
virtual Slice Finish(const BlockHandle& tmp, Status* status) = 0;
|
// If filter_data is not nullptr, Finish() may transfer ownership of
|
||||||
|
// underlying filter data to the caller, so that it can be freed as soon as
|
||||||
|
// possible.
|
||||||
|
virtual Slice Finish(
|
||||||
|
const BlockHandle& tmp /* only used in PartitionedFilterBlock as
|
||||||
|
last_partition_block_handle */
|
||||||
|
,
|
||||||
|
Status* status, std::unique_ptr<const char[]>* filter_data = nullptr) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// A FilterBlockReader is used to parse filter from SST table.
|
// A FilterBlockReader is used to parse filter from SST table.
|
||||||
|
@ -101,14 +101,17 @@ void FullFilterBlockBuilder::Reset() {
|
|||||||
last_prefix_recorded_ = false;
|
last_prefix_recorded_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
|
Slice FullFilterBlockBuilder::Finish(
|
||||||
Status* status) {
|
const BlockHandle& /*tmp*/, Status* status,
|
||||||
|
std::unique_ptr<const char[]>* filter_data) {
|
||||||
Reset();
|
Reset();
|
||||||
// In this impl we ignore BlockHandle
|
// In this impl we ignore BlockHandle
|
||||||
*status = Status::OK();
|
*status = Status::OK();
|
||||||
if (any_added_) {
|
if (any_added_) {
|
||||||
any_added_ = false;
|
any_added_ = false;
|
||||||
return filter_bits_builder_->Finish(&filter_data_);
|
Slice filter_content =
|
||||||
|
filter_bits_builder_->Finish(filter_data ? filter_data : &filter_data_);
|
||||||
|
return filter_content;
|
||||||
}
|
}
|
||||||
return Slice();
|
return Slice();
|
||||||
}
|
}
|
||||||
|
@ -54,7 +54,9 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
|
|||||||
virtual void Add(const Slice& key_without_ts) override;
|
virtual void Add(const Slice& key_without_ts) override;
|
||||||
virtual bool IsEmpty() const override { return !any_added_; }
|
virtual bool IsEmpty() const override { return !any_added_; }
|
||||||
virtual size_t EstimateEntriesAdded() override;
|
virtual size_t EstimateEntriesAdded() override;
|
||||||
virtual Slice Finish(const BlockHandle& tmp, Status* status) override;
|
virtual Slice Finish(
|
||||||
|
const BlockHandle& tmp, Status* status,
|
||||||
|
std::unique_ptr<const char[]>* filter_data = nullptr) override;
|
||||||
using FilterBlockBuilder::Finish;
|
using FilterBlockBuilder::Finish;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -73,7 +73,6 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
|
|||||||
if (!p_index_builder_->ShouldCutFilterBlock()) {
|
if (!p_index_builder_->ShouldCutFilterBlock()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
filter_gc.push_back(std::unique_ptr<const char[]>(nullptr));
|
|
||||||
|
|
||||||
// Add the prefix of the next key before finishing the partition without
|
// Add the prefix of the next key before finishing the partition without
|
||||||
// updating last_prefix_str_. This hack, fixes a bug with format_verison=3
|
// updating last_prefix_str_. This hack, fixes a bug with format_verison=3
|
||||||
@ -88,9 +87,10 @@ void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock(
|
|||||||
}
|
}
|
||||||
|
|
||||||
total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded();
|
total_added_in_built_ += filter_bits_builder_->EstimateEntriesAdded();
|
||||||
Slice filter = filter_bits_builder_->Finish(&filter_gc.back());
|
std::unique_ptr<const char[]> filter_data;
|
||||||
|
Slice filter = filter_bits_builder_->Finish(&filter_data);
|
||||||
std::string& index_key = p_index_builder_->GetPartitionKey();
|
std::string& index_key = p_index_builder_->GetPartitionKey();
|
||||||
filters.push_back({index_key, filter});
|
filters.push_back({index_key, filter, std::move(filter_data)});
|
||||||
keys_added_to_partition_ = 0;
|
keys_added_to_partition_ = 0;
|
||||||
Reset();
|
Reset();
|
||||||
}
|
}
|
||||||
@ -110,10 +110,10 @@ size_t PartitionedFilterBlockBuilder::EstimateEntriesAdded() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Slice PartitionedFilterBlockBuilder::Finish(
|
Slice PartitionedFilterBlockBuilder::Finish(
|
||||||
const BlockHandle& last_partition_block_handle, Status* status) {
|
const BlockHandle& last_partition_block_handle, Status* status,
|
||||||
|
std::unique_ptr<const char[]>* filter_data) {
|
||||||
if (finishing_filters == true) {
|
if (finishing_filters == true) {
|
||||||
// Record the handle of the last written filter block in the index
|
// Record the handle of the last written filter block in the index
|
||||||
FilterEntry& last_entry = filters.front();
|
|
||||||
std::string handle_encoding;
|
std::string handle_encoding;
|
||||||
last_partition_block_handle.EncodeTo(&handle_encoding);
|
last_partition_block_handle.EncodeTo(&handle_encoding);
|
||||||
std::string handle_delta_encoding;
|
std::string handle_delta_encoding;
|
||||||
@ -122,14 +122,13 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||||||
last_partition_block_handle.size() - last_encoded_handle_.size());
|
last_partition_block_handle.size() - last_encoded_handle_.size());
|
||||||
last_encoded_handle_ = last_partition_block_handle;
|
last_encoded_handle_ = last_partition_block_handle;
|
||||||
const Slice handle_delta_encoding_slice(handle_delta_encoding);
|
const Slice handle_delta_encoding_slice(handle_delta_encoding);
|
||||||
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding,
|
index_on_filter_block_builder_.Add(last_filter_entry_key, handle_encoding,
|
||||||
&handle_delta_encoding_slice);
|
&handle_delta_encoding_slice);
|
||||||
if (!p_index_builder_->seperator_is_key_plus_seq()) {
|
if (!p_index_builder_->seperator_is_key_plus_seq()) {
|
||||||
index_on_filter_block_builder_without_seq_.Add(
|
index_on_filter_block_builder_without_seq_.Add(
|
||||||
ExtractUserKey(last_entry.key), handle_encoding,
|
ExtractUserKey(last_filter_entry_key), handle_encoding,
|
||||||
&handle_delta_encoding_slice);
|
&handle_delta_encoding_slice);
|
||||||
}
|
}
|
||||||
filters.pop_front();
|
|
||||||
} else {
|
} else {
|
||||||
MaybeCutAFilterBlock(nullptr);
|
MaybeCutAFilterBlock(nullptr);
|
||||||
}
|
}
|
||||||
@ -137,6 +136,7 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||||||
// partitions
|
// partitions
|
||||||
if (UNLIKELY(filters.empty())) {
|
if (UNLIKELY(filters.empty())) {
|
||||||
*status = Status::OK();
|
*status = Status::OK();
|
||||||
|
last_filter_data.reset();
|
||||||
if (finishing_filters) {
|
if (finishing_filters) {
|
||||||
// Simplest to just add them all at the end
|
// Simplest to just add them all at the end
|
||||||
total_added_in_built_ = 0;
|
total_added_in_built_ = 0;
|
||||||
@ -154,7 +154,15 @@ Slice PartitionedFilterBlockBuilder::Finish(
|
|||||||
// indicate we expect more calls to Finish
|
// indicate we expect more calls to Finish
|
||||||
*status = Status::Incomplete();
|
*status = Status::Incomplete();
|
||||||
finishing_filters = true;
|
finishing_filters = true;
|
||||||
return filters.front().filter;
|
|
||||||
|
last_filter_entry_key = filters.front().key;
|
||||||
|
Slice filter = filters.front().filter;
|
||||||
|
last_filter_data = std::move(filters.front().filter_data);
|
||||||
|
if (filter_data != nullptr) {
|
||||||
|
*filter_data = std::move(last_filter_data);
|
||||||
|
}
|
||||||
|
filters.pop_front();
|
||||||
|
return filter;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <deque>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
@ -36,8 +37,9 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||||||
void Add(const Slice& key) override;
|
void Add(const Slice& key) override;
|
||||||
size_t EstimateEntriesAdded() override;
|
size_t EstimateEntriesAdded() override;
|
||||||
|
|
||||||
virtual Slice Finish(const BlockHandle& last_partition_block_handle,
|
virtual Slice Finish(
|
||||||
Status* status) override;
|
const BlockHandle& last_partition_block_handle, Status* status,
|
||||||
|
std::unique_ptr<const char[]>* filter_data = nullptr) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Filter data
|
// Filter data
|
||||||
@ -47,10 +49,13 @@ class PartitionedFilterBlockBuilder : public FullFilterBlockBuilder {
|
|||||||
struct FilterEntry {
|
struct FilterEntry {
|
||||||
std::string key;
|
std::string key;
|
||||||
Slice filter;
|
Slice filter;
|
||||||
|
std::unique_ptr<const char[]> filter_data;
|
||||||
};
|
};
|
||||||
std::list<FilterEntry> filters; // list of partitioned indexes and their keys
|
std::deque<FilterEntry> filters; // list of partitioned filters and keys used
|
||||||
|
// in building the index
|
||||||
|
std::string last_filter_entry_key;
|
||||||
|
std::unique_ptr<const char[]> last_filter_data;
|
||||||
std::unique_ptr<IndexBuilder> value;
|
std::unique_ptr<IndexBuilder> value;
|
||||||
std::vector<std::unique_ptr<const char[]>> filter_gc;
|
|
||||||
bool finishing_filters =
|
bool finishing_filters =
|
||||||
false; // true if Finish is called once but not complete yet.
|
false; // true if Finish is called once but not complete yet.
|
||||||
// The policy of when cut a filter block and Finish it
|
// The policy of when cut a filter block and Finish it
|
||||||
|
@ -136,8 +136,9 @@ class PartitionedFilterBlockTest
|
|||||||
BlockHandle bh;
|
BlockHandle bh;
|
||||||
Status status;
|
Status status;
|
||||||
Slice slice;
|
Slice slice;
|
||||||
|
std::unique_ptr<const char[]> filter_data;
|
||||||
do {
|
do {
|
||||||
slice = builder->Finish(bh, &status);
|
slice = builder->Finish(bh, &status, &filter_data);
|
||||||
bh = Write(slice);
|
bh = Write(slice);
|
||||||
} while (status.IsIncomplete());
|
} while (status.IsIncomplete());
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user