Delete filter before closing the table

Summary:
Some filters such as partitioned filter have pointers to the table for which they are created. Therefore is they are stored in the block cache, the should be forcibly erased from block cache before closing the  table, which would result into deleting the object. Otherwise the destructor will be called later when the cache is lazily erasing the object, which having the parent table no longer existent it could result into undefined behavior.

Update: there will be still cases the filter is not removed from the cache since the table has not kept a pointer to the cache handle to be able to forcibly release it later. We make sure that the filter destructor does not access the table pointer to get around such cases.
Closes https://github.com/facebook/rocksdb/pull/2207

Differential Revision: D4941591

Pulled By: maysamyabandeh

fbshipit-source-id: 56fbab2a11cf447e1aa67caa30b58d7bd7ce5bbd
This commit is contained in:
Maysam Yabandeh 2017-05-01 18:17:01 -07:00 committed by Facebook Github Bot
parent 47a09b0a88
commit 89833577a8
6 changed files with 44 additions and 30 deletions

View File

@ -101,6 +101,11 @@ Status BlockBasedTableFactory::SanitizeOptions(
"Unsupported BlockBasedTable format_version. Please check "
"include/rocksdb/table.h for more info");
}
if (table_options_.block_cache &&
table_options_.no_block_cache) {
return Status::InvalidArgument("Block cache is initialized"
", but it is disabled");
}
return Status::OK();
}

View File

@ -2063,21 +2063,17 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file) {
}
void BlockBasedTable::Close() {
rep_->filter_entry.Release(rep_->table_options.block_cache.get());
rep_->index_entry.Release(rep_->table_options.block_cache.get());
rep_->range_del_entry.Release(rep_->table_options.block_cache.get());
// cleanup index and filter blocks to avoid accessing dangling pointer
if (!rep_->table_options.no_block_cache) {
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
// Get the filter block key
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
rep_->footer.metaindex_handle(), cache_key);
rep_->table_options.block_cache.get()->Erase(key);
// Get the index block key
key = GetCacheKeyFromOffset(rep_->cache_key_prefix,
rep_->cache_key_prefix_size,
rep_->dummy_index_reader_offset, cache_key);
rep_->table_options.block_cache.get()->Erase(key);
const bool force_erase = true;
auto block_cache = rep_->table_options.block_cache.get();
if (block_cache) {
// The filter_entry and inde_entry's lifetime ends with table's and is
// forced to be released.
// Note that if the xxx_entry is not set then the cached entry might remian
// in the cache for some more time. The destrcutor hence must take int
// account that the table object migth no longer be available.
rep_->filter_entry.Release(block_cache, force_erase);
rep_->index_entry.Release(block_cache, force_erase);
rep_->range_del_entry.Release(block_cache);
}
}

View File

@ -370,9 +370,10 @@ struct BlockBasedTable::CachableEntry {
CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
: value(_value), cache_handle(_cache_handle) {}
CachableEntry() : CachableEntry(nullptr, nullptr) {}
void Release(Cache* cache) {
void Release(Cache* cache, bool force_erase = false) {
if (cache_handle) {
cache->Release(cache_handle);
bool erased = cache->Release(cache_handle, force_erase);
assert(!force_erase || erased);
value = nullptr;
cache_handle = nullptr;
}

View File

@ -82,16 +82,21 @@ PartitionedFilterBlockReader::PartitionedFilterBlockReader(
: FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
prefix_extractor_(prefix_extractor),
comparator_(comparator),
table_(table) {
table_(table),
block_cache_(table_->rep_->table_options.block_cache.get()){
idx_on_fltr_blk_.reset(new Block(std::move(contents),
kDisableGlobalSequenceNumber,
0 /* read_amp_bytes_per_bit */, stats));
}
PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
ReadLock rl(&mu_);
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
table_->rep_->table_options.block_cache.get()->Release(*it);
// The destructor migh be called via cache evict after the table is deleted.
// We should avoid using table_ pointer in destructor then.
if (block_cache_) {
ReadLock rl(&mu_);
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
block_cache_->Release(*it);
}
}
}
@ -122,7 +127,7 @@ bool PartitionedFilterBlockReader::KeyMayMatch(
return res;
}
if (LIKELY(filter_partition.IsSet())) {
filter_partition.Release(table_->rep_->table_options.block_cache.get());
filter_partition.Release(block_cache_);
} else {
delete filter_partition.value;
}
@ -154,7 +159,7 @@ bool PartitionedFilterBlockReader::PrefixMayMatch(
return res;
}
if (LIKELY(filter_partition.IsSet())) {
filter_partition.Release(table_->rep_->table_options.block_cache.get());
filter_partition.Release(block_cache_);
} else {
delete filter_partition.value;
}
@ -182,8 +187,7 @@ PartitionedFilterBlockReader::GetFilterPartition(Slice* handle_value,
auto s = fltr_blk_handle.DecodeFrom(handle_value);
assert(s.ok());
const bool is_a_filter_partition = true;
auto block_cache = table_->rep_->table_options.block_cache.get();
if (LIKELY(block_cache != nullptr)) {
if (LIKELY(block_cache_ != nullptr)) {
bool pin_cached_filters =
GetLevel() == 0 &&
table_->rep_->table_options.pin_l0_filter_and_index_blocks_in_cache;

View File

@ -85,6 +85,7 @@ class PartitionedFilterBlockReader : public FilterBlockReader {
std::unique_ptr<Block> idx_on_fltr_blk_;
const Comparator& comparator_;
const BlockBasedTable* table_;
Cache* block_cache_;
std::unordered_map<uint64_t, FilterBlockReader*> filter_cache_;
autovector<Cache::Handle*> handle_list_;
port::RWMutex mu_;

View File

@ -9,7 +9,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <inttypes.h>
#include <stdio.h>
#include <algorithm>
@ -1850,7 +1849,10 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) {
// -- Table construction
Options options;
options.create_if_missing = true;
options.statistics = CreateDBStatistics();
// Keep a ref to statistic to prevent it from being destructed before
// block cache gets cleaned up upon next table_factory.reset
auto statistics = CreateDBStatistics();
options.statistics = statistics;
// Enable the cache for index/filter blocks
BlockBasedTableOptions table_options;
@ -1930,15 +1932,17 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) {
}
// release the iterator so that the block cache can reset correctly.
iter.reset();
c.ResetTableReader();
// -- PART 2: Open with very small block cache
// In this test, no block will ever get hit since the block cache is
// too small to fit even one entry.
table_options.block_cache = NewLRUCache(1, 4);
options.statistics = CreateDBStatistics();
options.table_factory.reset(new BlockBasedTableFactory(table_options));
// Keep a ref to statistic to prevent it from being destructed before
// block cache gets cleaned up upon next table_factory.reset
statistics = CreateDBStatistics();
options.statistics = statistics;
const ImmutableCFOptions ioptions2(options);
c.Reopen(ioptions2);
{
@ -1993,7 +1997,10 @@ TEST_F(BlockBasedTableTest, FilterBlockInBlockCache) {
// Open table with filter policy
table_options.filter_policy.reset(NewBloomFilterPolicy(1));
options.table_factory.reset(new BlockBasedTableFactory(table_options));
options.statistics = CreateDBStatistics();
// Keep a ref to statistic to prevent it from being destructed before
// block cache gets cleaned up upon next table_factory.reset
statistics = CreateDBStatistics();
options.statistics = statistics;
ImmutableCFOptions ioptions4(options);
ASSERT_OK(c3.Reopen(ioptions4));
reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());