rocksdb/table/partitioned_filter_block.cc
Maysam Yabandeh 89833577a8 Delete filter before closing the table
Summary:
Some filters such as partitioned filter have pointers to the table for which they are created. Therefore is they are stored in the block cache, the should be forcibly erased from block cache before closing the  table, which would result into deleting the object. Otherwise the destructor will be called later when the cache is lazily erasing the object, which having the parent table no longer existent it could result into undefined behavior.

Update: there will be still cases the filter is not removed from the cache since the table has not kept a pointer to the cache handle to be able to forcibly release it later. We make sure that the filter destructor does not access the table pointer to get around such cases.
Closes https://github.com/facebook/rocksdb/pull/2207

Differential Revision: D4941591

Pulled By: maysamyabandeh

fbshipit-source-id: 56fbab2a11cf447e1aa67caa30b58d7bd7ce5bbd
2017-05-01 19:19:37 -07:00

227 lines
7.7 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
// This source code is also licensed under the GPLv2 license found in the
// COPYING file in the root directory of this source tree.
#include "table/partitioned_filter_block.h"
#include <utility>
#include "port/port.h"
#include "rocksdb/filter_policy.h"
#include "table/block.h"
#include "table/block_based_table_reader.h"
#include "util/coding.h"
namespace rocksdb {
PartitionedFilterBlockBuilder::PartitionedFilterBlockBuilder(
const SliceTransform* prefix_extractor, bool whole_key_filtering,
FilterBitsBuilder* filter_bits_builder, int index_block_restart_interval,
PartitionedIndexBuilder* const p_index_builder)
: FullFilterBlockBuilder(prefix_extractor, whole_key_filtering,
filter_bits_builder),
index_on_filter_block_builder_(index_block_restart_interval),
p_index_builder_(p_index_builder) {}
PartitionedFilterBlockBuilder::~PartitionedFilterBlockBuilder() {}
void PartitionedFilterBlockBuilder::MaybeCutAFilterBlock() {
if (!p_index_builder_->ShouldCutFilterBlock()) {
return;
}
filter_gc.push_back(std::unique_ptr<const char[]>(nullptr));
Slice filter = filter_bits_builder_->Finish(&filter_gc.back());
std::string& index_key = p_index_builder_->GetPartitionKey();
filters.push_back({index_key, filter});
}
void PartitionedFilterBlockBuilder::AddKey(const Slice& key) {
MaybeCutAFilterBlock();
filter_bits_builder_->AddKey(key);
}
Slice PartitionedFilterBlockBuilder::Finish(
const BlockHandle& last_partition_block_handle, Status* status) {
if (finishing_filters == true) {
// Record the handle of the last written filter block in the index
FilterEntry& last_entry = filters.front();
std::string handle_encoding;
last_partition_block_handle.EncodeTo(&handle_encoding);
index_on_filter_block_builder_.Add(last_entry.key, handle_encoding);
filters.pop_front();
} else {
MaybeCutAFilterBlock();
}
// If there is no filter partition left, then return the index on filter
// partitions
if (UNLIKELY(filters.empty())) {
*status = Status::OK();
if (finishing_filters) {
return index_on_filter_block_builder_.Finish();
} else {
// This is the rare case where no key was added to the filter
return Slice();
}
} else {
// Return the next filter partition in line and set Incomplete() status to
// indicate we expect more calls to Finish
*status = Status::Incomplete();
finishing_filters = true;
return filters.front().filter;
}
}
PartitionedFilterBlockReader::PartitionedFilterBlockReader(
const SliceTransform* prefix_extractor, bool _whole_key_filtering,
BlockContents&& contents, FilterBitsReader* filter_bits_reader,
Statistics* stats, const Comparator& comparator,
const BlockBasedTable* table)
: FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
prefix_extractor_(prefix_extractor),
comparator_(comparator),
table_(table),
block_cache_(table_->rep_->table_options.block_cache.get()){
idx_on_fltr_blk_.reset(new Block(std::move(contents),
kDisableGlobalSequenceNumber,
0 /* read_amp_bytes_per_bit */, stats));
}
PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
// The destructor migh be called via cache evict after the table is deleted.
// We should avoid using table_ pointer in destructor then.
if (block_cache_) {
ReadLock rl(&mu_);
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
block_cache_->Release(*it);
}
}
}
bool PartitionedFilterBlockReader::KeyMayMatch(
const Slice& key, uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr) {
assert(const_ikey_ptr != nullptr);
assert(block_offset == kNotValid);
if (!whole_key_filtering_) {
return true;
}
if (UNLIKELY(idx_on_fltr_blk_->size() == 0)) {
return true;
}
// This is the user key vs. the full key in the partition index. We assume
// that user key <= full key
auto filter_handle = GetFilterPartitionHandle(*const_ikey_ptr);
if (UNLIKELY(filter_handle.size() == 0)) { // key is out of range
return false;
}
bool cached = false;
auto filter_partition = GetFilterPartition(&filter_handle, no_io, &cached);
if (UNLIKELY(!filter_partition.value)) {
return true;
}
auto res = filter_partition.value->KeyMayMatch(key, block_offset, no_io);
if (cached) {
return res;
}
if (LIKELY(filter_partition.IsSet())) {
filter_partition.Release(block_cache_);
} else {
delete filter_partition.value;
}
return res;
}
bool PartitionedFilterBlockReader::PrefixMayMatch(
const Slice& prefix, uint64_t block_offset, const bool no_io,
const Slice* const const_ikey_ptr) {
assert(const_ikey_ptr != nullptr);
assert(block_offset == kNotValid);
if (!prefix_extractor_) {
return true;
}
if (UNLIKELY(idx_on_fltr_blk_->size() == 0)) {
return true;
}
auto filter_handle = GetFilterPartitionHandle(*const_ikey_ptr);
if (UNLIKELY(filter_handle.size() == 0)) { // prefix is out of range
return false;
}
bool cached = false;
auto filter_partition = GetFilterPartition(&filter_handle, no_io, &cached);
if (UNLIKELY(!filter_partition.value)) {
return true;
}
auto res = filter_partition.value->PrefixMayMatch(prefix, kNotValid, no_io);
if (cached) {
return res;
}
if (LIKELY(filter_partition.IsSet())) {
filter_partition.Release(block_cache_);
} else {
delete filter_partition.value;
}
return res;
}
Slice PartitionedFilterBlockReader::GetFilterPartitionHandle(
const Slice& entry) {
BlockIter iter;
idx_on_fltr_blk_->NewIterator(&comparator_, &iter, true);
iter.Seek(entry);
if (UNLIKELY(!iter.Valid())) {
return Slice();
}
assert(iter.Valid());
Slice handle_value = iter.value();
return handle_value;
}
BlockBasedTable::CachableEntry<FilterBlockReader>
PartitionedFilterBlockReader::GetFilterPartition(Slice* handle_value,
const bool no_io,
bool* cached) {
BlockHandle fltr_blk_handle;
auto s = fltr_blk_handle.DecodeFrom(handle_value);
assert(s.ok());
const bool is_a_filter_partition = true;
if (LIKELY(block_cache_ != nullptr)) {
bool pin_cached_filters =
GetLevel() == 0 &&
table_->rep_->table_options.pin_l0_filter_and_index_blocks_in_cache;
if (pin_cached_filters) {
ReadLock rl(&mu_);
auto iter = filter_cache_.find(fltr_blk_handle.offset());
if (iter != filter_cache_.end()) {
RecordTick(statistics(), BLOCK_CACHE_FILTER_HIT);
*cached = true;
return {iter->second, nullptr};
}
}
auto filter =
table_->GetFilter(fltr_blk_handle, is_a_filter_partition, no_io);
if (pin_cached_filters && filter.IsSet()) {
WriteLock wl(&mu_);
std::pair<uint64_t, FilterBlockReader*> pair(fltr_blk_handle.offset(),
filter.value);
auto succ = filter_cache_.insert(pair).second;
if (succ) {
handle_list_.push_back(filter.cache_handle);
} // Otherwise it is already inserted by a concurrent thread
*cached = true;
}
return filter;
} else {
auto filter = table_->ReadFilter(fltr_blk_handle, is_a_filter_partition);
return {filter, nullptr};
}
}
size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const {
return idx_on_fltr_blk_->size();
}
} // namespace rocksdb