Object lifetime in cache
Summary: Any non-raw-data dependent object must be destructed before the table closes. There was a bug of not doing that for filter object. This patch fixes the bug and adds a unit test to prevent such bugs in future. Closes https://github.com/facebook/rocksdb/pull/2246 Differential Revision: D5001318 Pulled By: maysamyabandeh fbshipit-source-id: 6d8772e58765485868094b92964da82ef9730b6d
This commit is contained in:
parent
fdaefa0309
commit
40af2381ec
@ -187,6 +187,10 @@ class Cache {
|
|||||||
|
|
||||||
virtual std::string GetPrintableOptions() const { return ""; }
|
virtual std::string GetPrintableOptions() const { return ""; }
|
||||||
|
|
||||||
|
// Mark the last inserted object as being a raw data block. This will be used
|
||||||
|
// in tests. The default implementation does nothing.
|
||||||
|
virtual void TEST_mark_as_data_block(const Slice& key, size_t charge) {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// No copying allowed
|
// No copying allowed
|
||||||
Cache(const Cache&);
|
Cache(const Cache&);
|
||||||
|
@ -913,6 +913,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
|||||||
s = block_cache->Insert(
|
s = block_cache->Insert(
|
||||||
block_cache_key, block->value, block->value->usable_size(),
|
block_cache_key, block->value, block->value->usable_size(),
|
||||||
&DeleteCachedEntry<Block>, &(block->cache_handle));
|
&DeleteCachedEntry<Block>, &(block->cache_handle));
|
||||||
|
block_cache->TEST_mark_as_data_block(block_cache_key,
|
||||||
|
block->value->usable_size());
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||||
if (is_index) {
|
if (is_index) {
|
||||||
@ -994,6 +996,8 @@ Status BlockBasedTable::PutDataBlockToCache(
|
|||||||
s = block_cache->Insert(
|
s = block_cache->Insert(
|
||||||
block_cache_key, block->value, block->value->usable_size(),
|
block_cache_key, block->value, block->value->usable_size(),
|
||||||
&DeleteCachedEntry<Block>, &(block->cache_handle), priority);
|
&DeleteCachedEntry<Block>, &(block->cache_handle), priority);
|
||||||
|
block_cache->TEST_mark_as_data_block(block_cache_key,
|
||||||
|
block->value->usable_size());
|
||||||
if (s.ok()) {
|
if (s.ok()) {
|
||||||
assert(block->cache_handle != nullptr);
|
assert(block->cache_handle != nullptr);
|
||||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||||
@ -2090,7 +2094,7 @@ void BlockBasedTable::Close() {
|
|||||||
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||||
// Get the filter block key
|
// Get the filter block key
|
||||||
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
|
auto key = GetCacheKey(rep_->cache_key_prefix, rep_->cache_key_prefix_size,
|
||||||
rep_->footer.metaindex_handle(), cache_key);
|
rep_->filter_handle, cache_key);
|
||||||
rep_->table_options.block_cache.get()->Erase(key);
|
rep_->table_options.block_cache.get()->Erase(key);
|
||||||
// Get the index block key
|
// Get the index block key
|
||||||
key = GetCacheKeyFromOffset(rep_->cache_key_prefix,
|
key = GetCacheKeyFromOffset(rep_->cache_key_prefix,
|
||||||
|
@ -376,9 +376,9 @@ struct BlockBasedTable::CachableEntry {
|
|||||||
CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
|
CachableEntry(TValue* _value, Cache::Handle* _cache_handle)
|
||||||
: value(_value), cache_handle(_cache_handle) {}
|
: value(_value), cache_handle(_cache_handle) {}
|
||||||
CachableEntry() : CachableEntry(nullptr, nullptr) {}
|
CachableEntry() : CachableEntry(nullptr, nullptr) {}
|
||||||
void Release(Cache* cache) {
|
void Release(Cache* cache, bool force_erase = false) {
|
||||||
if (cache_handle) {
|
if (cache_handle) {
|
||||||
cache->Release(cache_handle);
|
cache->Release(cache_handle, force_erase);
|
||||||
value = nullptr;
|
value = nullptr;
|
||||||
cache_handle = nullptr;
|
cache_handle = nullptr;
|
||||||
}
|
}
|
||||||
|
@ -89,9 +89,19 @@ PartitionedFilterBlockReader::PartitionedFilterBlockReader(
|
|||||||
}
|
}
|
||||||
|
|
||||||
PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
|
PartitionedFilterBlockReader::~PartitionedFilterBlockReader() {
|
||||||
ReadLock rl(&mu_);
|
{
|
||||||
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
|
ReadLock rl(&mu_);
|
||||||
table_->rep_->table_options.block_cache.get()->Release(*it);
|
for (auto it = handle_list_.begin(); it != handle_list_.end(); ++it) {
|
||||||
|
table_->rep_->table_options.block_cache.get()->Release(*it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
char cache_key[BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length];
|
||||||
|
for (auto it = filter_block_set_.begin(); it != filter_block_set_.end();
|
||||||
|
++it) {
|
||||||
|
auto key = BlockBasedTable::GetCacheKey(table_->rep_->cache_key_prefix,
|
||||||
|
table_->rep_->cache_key_prefix_size,
|
||||||
|
*it, cache_key);
|
||||||
|
table_->rep_->table_options.block_cache.get()->Erase(key);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,8 +116,6 @@ bool PartitionedFilterBlockReader::KeyMayMatch(
|
|||||||
if (UNLIKELY(idx_on_fltr_blk_->size() == 0)) {
|
if (UNLIKELY(idx_on_fltr_blk_->size() == 0)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// This is the user key vs. the full key in the partition index. We assume
|
|
||||||
// that user key <= full key
|
|
||||||
auto filter_handle = GetFilterPartitionHandle(*const_ikey_ptr);
|
auto filter_handle = GetFilterPartitionHandle(*const_ikey_ptr);
|
||||||
if (UNLIKELY(filter_handle.size() == 0)) { // key is out of range
|
if (UNLIKELY(filter_handle.size() == 0)) { // key is out of range
|
||||||
return false;
|
return false;
|
||||||
@ -198,15 +206,18 @@ PartitionedFilterBlockReader::GetFilterPartition(Slice* handle_value,
|
|||||||
}
|
}
|
||||||
auto filter =
|
auto filter =
|
||||||
table_->GetFilter(fltr_blk_handle, is_a_filter_partition, no_io);
|
table_->GetFilter(fltr_blk_handle, is_a_filter_partition, no_io);
|
||||||
if (pin_cached_filters && filter.IsSet()) {
|
if (filter.IsSet()) {
|
||||||
WriteLock wl(&mu_);
|
filter_block_set_.insert(fltr_blk_handle);
|
||||||
std::pair<uint64_t, FilterBlockReader*> pair(fltr_blk_handle.offset(),
|
if (pin_cached_filters) {
|
||||||
filter.value);
|
WriteLock wl(&mu_);
|
||||||
auto succ = filter_cache_.insert(pair).second;
|
std::pair<uint64_t, FilterBlockReader*> pair(fltr_blk_handle.offset(),
|
||||||
if (succ) {
|
filter.value);
|
||||||
handle_list_.push_back(filter.cache_handle);
|
auto succ = filter_cache_.insert(pair).second;
|
||||||
} // Otherwise it is already inserted by a concurrent thread
|
if (succ) {
|
||||||
*cached = true;
|
handle_list_.push_back(filter.cache_handle);
|
||||||
|
} // Otherwise it is already inserted by a concurrent thread
|
||||||
|
*cached = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return filter;
|
return filter;
|
||||||
} else {
|
} else {
|
||||||
|
@ -87,6 +87,12 @@ class PartitionedFilterBlockReader : public FilterBlockReader {
|
|||||||
const BlockBasedTable* table_;
|
const BlockBasedTable* table_;
|
||||||
std::unordered_map<uint64_t, FilterBlockReader*> filter_cache_;
|
std::unordered_map<uint64_t, FilterBlockReader*> filter_cache_;
|
||||||
autovector<Cache::Handle*> handle_list_;
|
autovector<Cache::Handle*> handle_list_;
|
||||||
|
struct BlockHandleCmp {
|
||||||
|
bool operator()(const BlockHandle& lhs, const BlockHandle& rhs) const {
|
||||||
|
return lhs.offset() < rhs.offset();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
std::set<BlockHandle, BlockHandleCmp> filter_block_set_;
|
||||||
port::RWMutex mu_;
|
port::RWMutex mu_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
// Use of this source code is governed by a BSD-style license that can be
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -19,6 +18,7 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "cache/lru_cache.h"
|
||||||
#include "db/dbformat.h"
|
#include "db/dbformat.h"
|
||||||
#include "db/memtable.h"
|
#include "db/memtable.h"
|
||||||
#include "db/write_batch_internal.h"
|
#include "db/write_batch_internal.h"
|
||||||
@ -2127,6 +2127,137 @@ TEST_F(BlockBasedTableTest, BlockReadCountTest) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A wrapper around LRICache that also keeps track of data blocks (in contrast
|
||||||
|
// with the objects) in the cache. The class is very simple and can be used only
|
||||||
|
// for trivial tests.
|
||||||
|
class MockCache : public LRUCache {
|
||||||
|
public:
|
||||||
|
MockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
|
||||||
|
double high_pri_pool_ratio)
|
||||||
|
: LRUCache(capacity, num_shard_bits, strict_capacity_limit,
|
||||||
|
high_pri_pool_ratio) {}
|
||||||
|
virtual Status Insert(const Slice& key, void* value, size_t charge,
|
||||||
|
void (*deleter)(const Slice& key, void* value),
|
||||||
|
Handle** handle = nullptr,
|
||||||
|
Priority priority = Priority::LOW) override {
|
||||||
|
// Replace the deleter with our own so that we keep track of data blocks
|
||||||
|
// erased from the cache
|
||||||
|
deleters_[key.ToString()] = deleter;
|
||||||
|
return ShardedCache::Insert(key, value, charge, &MockDeleter, handle,
|
||||||
|
priority);
|
||||||
|
}
|
||||||
|
// This is called by the application right after inserting a data block
|
||||||
|
virtual void TEST_mark_as_data_block(const Slice& key,
|
||||||
|
size_t charge) override {
|
||||||
|
marked_data_in_cache_[key.ToString()] = charge;
|
||||||
|
marked_size_ += charge;
|
||||||
|
}
|
||||||
|
using DeleterFunc = void (*)(const Slice& key, void* value);
|
||||||
|
static std::map<std::string, DeleterFunc> deleters_;
|
||||||
|
static std::map<std::string, size_t> marked_data_in_cache_;
|
||||||
|
static size_t marked_size_;
|
||||||
|
static void MockDeleter(const Slice& key, void* value) {
|
||||||
|
// If the item was marked for being data block, decrease its usage from the
|
||||||
|
// total data block usage of the cache
|
||||||
|
if (marked_data_in_cache_.find(key.ToString()) !=
|
||||||
|
marked_data_in_cache_.end()) {
|
||||||
|
marked_size_ -= marked_data_in_cache_[key.ToString()];
|
||||||
|
}
|
||||||
|
// Then call the origianl deleter
|
||||||
|
assert(deleters_.find(key.ToString()) != deleters_.end());
|
||||||
|
auto deleter = deleters_[key.ToString()];
|
||||||
|
deleter(key, value);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
size_t MockCache::marked_size_ = 0;
|
||||||
|
std::map<std::string, MockCache::DeleterFunc> MockCache::deleters_;
|
||||||
|
std::map<std::string, size_t> MockCache::marked_data_in_cache_;
|
||||||
|
|
||||||
|
// Block cache can contain raw data blocks as well as general objects. If an
|
||||||
|
// object depends on the table to be live, it then must be destructed before the
|
||||||
|
// table is closed. This test makese sure that the only items remains in the
|
||||||
|
// cache after the table is closed are raw data blocks.
|
||||||
|
TEST_F(BlockBasedTableTest, NoObjectInCacheAfterTableClose) {
|
||||||
|
for (auto index_type :
|
||||||
|
{BlockBasedTableOptions::IndexType::kBinarySearch,
|
||||||
|
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch}) {
|
||||||
|
for (bool block_based_filter : {true, false}) {
|
||||||
|
for (bool partition_filter : {true, false}) {
|
||||||
|
if (partition_filter &&
|
||||||
|
(block_based_filter ||
|
||||||
|
index_type !=
|
||||||
|
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (bool index_and_filter_in_cache : {true, false}) {
|
||||||
|
for (bool pin_l0 : {true, false}) {
|
||||||
|
if (pin_l0 && !index_and_filter_in_cache) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Create a table
|
||||||
|
Options opt;
|
||||||
|
unique_ptr<InternalKeyComparator> ikc;
|
||||||
|
ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
|
||||||
|
opt.compression = kNoCompression;
|
||||||
|
BlockBasedTableOptions table_options;
|
||||||
|
table_options.block_size = 1024;
|
||||||
|
table_options.index_type =
|
||||||
|
BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
|
||||||
|
table_options.pin_l0_filter_and_index_blocks_in_cache = pin_l0;
|
||||||
|
table_options.partition_filters = partition_filter;
|
||||||
|
table_options.cache_index_and_filter_blocks =
|
||||||
|
index_and_filter_in_cache;
|
||||||
|
// big enough so we don't ever lose cached values.
|
||||||
|
table_options.block_cache = std::shared_ptr<rocksdb::Cache>(
|
||||||
|
new MockCache(16 * 1024 * 1024, 4, false, 0.0));
|
||||||
|
table_options.filter_policy.reset(
|
||||||
|
rocksdb::NewBloomFilterPolicy(10, block_based_filter));
|
||||||
|
opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
||||||
|
|
||||||
|
TableConstructor c(BytewiseComparator());
|
||||||
|
std::string user_key = "k01";
|
||||||
|
std::string key =
|
||||||
|
InternalKey(user_key, 0, kTypeValue).Encode().ToString();
|
||||||
|
c.Add(key, "hello");
|
||||||
|
std::vector<std::string> keys;
|
||||||
|
stl_wrappers::KVMap kvmap;
|
||||||
|
const ImmutableCFOptions ioptions(opt);
|
||||||
|
c.Finish(opt, ioptions, table_options, *ikc, &keys, &kvmap);
|
||||||
|
|
||||||
|
// Doing a read to make index/filter loaded into the cache
|
||||||
|
auto table_reader =
|
||||||
|
dynamic_cast<BlockBasedTable*>(c.GetTableReader());
|
||||||
|
PinnableSlice value;
|
||||||
|
GetContext get_context(opt.comparator, nullptr, nullptr, nullptr,
|
||||||
|
GetContext::kNotFound, user_key, &value,
|
||||||
|
nullptr, nullptr, nullptr, nullptr);
|
||||||
|
InternalKey ikey(user_key, 0, kTypeValue);
|
||||||
|
auto s = table_reader->Get(ReadOptions(), key, &get_context);
|
||||||
|
ASSERT_EQ(get_context.State(), GetContext::kFound);
|
||||||
|
ASSERT_STREQ(value.data(), "hello");
|
||||||
|
|
||||||
|
// Close the table
|
||||||
|
c.ResetTableReader();
|
||||||
|
|
||||||
|
auto usage = table_options.block_cache->GetUsage();
|
||||||
|
auto pinned_usage = table_options.block_cache->GetPinnedUsage();
|
||||||
|
// The only usage must be for marked data blocks
|
||||||
|
ASSERT_EQ(usage, MockCache::marked_size_);
|
||||||
|
// There must be some pinned data since PinnableSlice has not
|
||||||
|
// released them yet
|
||||||
|
ASSERT_GT(pinned_usage, 0);
|
||||||
|
// Release pinnable slice reousrces
|
||||||
|
value.Reset();
|
||||||
|
pinned_usage = table_options.block_cache->GetPinnedUsage();
|
||||||
|
ASSERT_EQ(pinned_usage, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(BlockBasedTableTest, BlockCacheLeak) {
|
TEST_F(BlockBasedTableTest, BlockCacheLeak) {
|
||||||
// Check that when we reopen a table we don't lose access to blocks already
|
// Check that when we reopen a table we don't lose access to blocks already
|
||||||
// in the cache. This test checks whether the Table actually makes use of the
|
// in the cache. This test checks whether the Table actually makes use of the
|
||||||
|
Loading…
Reference in New Issue
Block a user