Charging block cache more accurately (#4073)
Summary: Currently the block cache is charged only by the size of the raw data block and excludes the overhead of the c++ objects that contain the raw data block. The patch improves the accuracy of the charge by including the c++ object overhead into it. Closes https://github.com/facebook/rocksdb/pull/4073 Differential Revision: D8686552 Pulled By: maysamyabandeh fbshipit-source-id: 8472f7fc163c0644533bc6942e20cdd5725f520f
This commit is contained in:
parent
b3efb1cbe0
commit
29ffbb8a50
@ -486,9 +486,17 @@ void Block::SetBlockPrefixIndex(BlockPrefixIndex* prefix_index) {
|
||||
|
||||
size_t Block::ApproximateMemoryUsage() const {
|
||||
size_t usage = usable_size();
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
usage += malloc_usable_size((void*)this);
|
||||
#else
|
||||
usage += sizeof(*this);
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
if (prefix_index_) {
|
||||
usage += prefix_index_->ApproximateMemoryUsage();
|
||||
}
|
||||
if (read_amp_bitmap_) {
|
||||
usage += read_amp_bitmap_->ApproximateMemoryUsage();
|
||||
}
|
||||
return usage;
|
||||
}
|
||||
|
||||
|
@ -104,6 +104,13 @@ class BlockReadAmpBitmap {
|
||||
|
||||
uint32_t GetBytesPerBit() { return 1 << bytes_per_bit_pow_; }
|
||||
|
||||
size_t ApproximateMemoryUsage() const {
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return malloc_usable_size((void*)this);
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return sizeof(*this);
|
||||
}
|
||||
|
||||
private:
|
||||
// Get the current value of bit at `bit_idx` and set it to 1
|
||||
inline bool GetAndSet(uint32_t bit_idx) {
|
||||
@ -142,14 +149,8 @@ class Block {
|
||||
size_t size() const { return size_; }
|
||||
const char* data() const { return data_; }
|
||||
bool cachable() const { return contents_.cachable; }
|
||||
size_t usable_size() const {
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
if (contents_.allocation.get() != nullptr) {
|
||||
return malloc_usable_size(contents_.allocation.get());
|
||||
}
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return size_;
|
||||
}
|
||||
// The additional memory space taken by the block data.
|
||||
size_t usable_size() const { return contents_.usable_size(); }
|
||||
uint32_t NumRestarts() const;
|
||||
CompressionType compression_type() const {
|
||||
return contents_.compression_type;
|
||||
|
@ -659,7 +659,7 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents,
|
||||
(end - r->compressed_cache_key_prefix));
|
||||
|
||||
// Insert into compressed block cache.
|
||||
block_cache_compressed->Insert(key, block, block->usable_size(),
|
||||
block_cache_compressed->Insert(key, block, block->ApproximateMemoryUsage(),
|
||||
&DeleteCachedBlock);
|
||||
|
||||
// Invalidate OS cache.
|
||||
|
@ -346,7 +346,14 @@ class PartitionIndexReader : public IndexReader, public Cleanable {
|
||||
|
||||
virtual size_t ApproximateMemoryUsage() const override {
|
||||
assert(index_block_);
|
||||
return index_block_->ApproximateMemoryUsage();
|
||||
size_t usage = index_block_->ApproximateMemoryUsage();
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
usage += malloc_usable_size((void*)this);
|
||||
#else
|
||||
usage += sizeof(*this);
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
// TODO(myabandeh): more accurate estimate of partition_map_ mem usage
|
||||
return usage;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -415,7 +422,13 @@ class BinarySearchIndexReader : public IndexReader {
|
||||
|
||||
virtual size_t ApproximateMemoryUsage() const override {
|
||||
assert(index_block_);
|
||||
return index_block_->ApproximateMemoryUsage();
|
||||
size_t usage = index_block_->ApproximateMemoryUsage();
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
usage += malloc_usable_size((void*)this);
|
||||
#else
|
||||
usage += sizeof(*this);
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return usage;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -532,8 +545,14 @@ class HashIndexReader : public IndexReader {
|
||||
|
||||
virtual size_t ApproximateMemoryUsage() const override {
|
||||
assert(index_block_);
|
||||
return index_block_->ApproximateMemoryUsage() +
|
||||
prefixes_contents_.data.size();
|
||||
size_t usage = index_block_->ApproximateMemoryUsage();
|
||||
usage += prefixes_contents_.usable_size();
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
usage += malloc_usable_size((void*)this);
|
||||
#else
|
||||
usage += sizeof(*this);
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return usage;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -1154,40 +1173,34 @@ Status BlockBasedTable::GetDataBlockFromCache(
|
||||
assert(block->value->compression_type() == kNoCompression);
|
||||
if (block_cache != nullptr && block->value->cachable() &&
|
||||
read_options.fill_cache) {
|
||||
s = block_cache->Insert(
|
||||
block_cache_key, block->value, block->value->usable_size(),
|
||||
&DeleteCachedEntry<Block>, &(block->cache_handle));
|
||||
block_cache->TEST_mark_as_data_block(block_cache_key,
|
||||
block->value->usable_size());
|
||||
size_t charge = block->value->ApproximateMemoryUsage();
|
||||
s = block_cache->Insert(block_cache_key, block->value, charge,
|
||||
&DeleteCachedEntry<Block>,
|
||||
&(block->cache_handle));
|
||||
block_cache->TEST_mark_as_data_block(block_cache_key, charge);
|
||||
if (s.ok()) {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE, charge);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, charge);
|
||||
}
|
||||
if (is_index) {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_BYTES_INSERT, charge);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, charge);
|
||||
}
|
||||
} else {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_BYTES_INSERT, charge);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, charge);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -1243,7 +1256,7 @@ Status BlockBasedTable::PutDataBlockToCache(
|
||||
if (block_cache_compressed != nullptr && raw_block != nullptr &&
|
||||
raw_block->cachable()) {
|
||||
s = block_cache_compressed->Insert(compressed_block_cache_key, raw_block,
|
||||
raw_block->usable_size(),
|
||||
raw_block->ApproximateMemoryUsage(),
|
||||
&DeleteCachedEntry<Block>);
|
||||
if (s.ok()) {
|
||||
// Avoid the following code to delete this cached block.
|
||||
@ -1258,41 +1271,35 @@ Status BlockBasedTable::PutDataBlockToCache(
|
||||
// insert into uncompressed block cache
|
||||
assert((block->value->compression_type() == kNoCompression));
|
||||
if (block_cache != nullptr && block->value->cachable()) {
|
||||
s = block_cache->Insert(
|
||||
block_cache_key, block->value, block->value->usable_size(),
|
||||
&DeleteCachedEntry<Block>, &(block->cache_handle), priority);
|
||||
block_cache->TEST_mark_as_data_block(block_cache_key,
|
||||
block->value->usable_size());
|
||||
size_t charge = block->value->ApproximateMemoryUsage();
|
||||
s = block_cache->Insert(block_cache_key, block->value, charge,
|
||||
&DeleteCachedEntry<Block>, &(block->cache_handle),
|
||||
priority);
|
||||
block_cache->TEST_mark_as_data_block(block_cache_key, charge);
|
||||
if (s.ok()) {
|
||||
assert(block->cache_handle != nullptr);
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE, charge);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE,
|
||||
block->value->usable_size());
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, charge);
|
||||
}
|
||||
if (is_index) {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_INDEX_BYTES_INSERT, charge);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, charge);
|
||||
}
|
||||
} else {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_DATA_BYTES_INSERT, charge);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT,
|
||||
block->value->usable_size());
|
||||
RecordTick(statistics, BLOCK_CACHE_DATA_BYTES_INSERT, charge);
|
||||
}
|
||||
}
|
||||
assert(reinterpret_cast<Block*>(
|
||||
@ -1429,24 +1436,23 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
|
||||
filter = ReadFilter(prefetch_buffer, filter_blk_handle,
|
||||
is_a_filter_partition, prefix_extractor);
|
||||
if (filter != nullptr) {
|
||||
size_t usage = filter->ApproximateMemoryUsage();
|
||||
Status s = block_cache->Insert(
|
||||
key, filter, filter->size(), &DeleteCachedFilterEntry, &cache_handle,
|
||||
key, filter, usage, &DeleteCachedFilterEntry, &cache_handle,
|
||||
rep_->table_options.cache_index_and_filter_blocks_with_high_priority
|
||||
? Cache::Priority::HIGH
|
||||
: Cache::Priority::LOW);
|
||||
if (s.ok()) {
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE, filter->size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE, usage);
|
||||
get_context->RecordCounters(BLOCK_CACHE_FILTER_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_FILTER_BYTES_INSERT,
|
||||
filter->size());
|
||||
get_context->RecordCounters(BLOCK_CACHE_FILTER_BYTES_INSERT, usage);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, filter->size());
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usage);
|
||||
RecordTick(statistics, BLOCK_CACHE_FILTER_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT,
|
||||
filter->size());
|
||||
RecordTick(statistics, BLOCK_CACHE_FILTER_BYTES_INSERT, usage);
|
||||
}
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
|
||||
@ -1512,27 +1518,27 @@ InternalIterator* BlockBasedTable::NewIndexIterator(
|
||||
TEST_SYNC_POINT("BlockBasedTable::NewIndexIterator::thread1:1");
|
||||
TEST_SYNC_POINT("BlockBasedTable::NewIndexIterator::thread2:3");
|
||||
TEST_SYNC_POINT("BlockBasedTable::NewIndexIterator::thread1:4");
|
||||
size_t charge = 0;
|
||||
if (s.ok()) {
|
||||
assert(index_reader != nullptr);
|
||||
charge = index_reader->ApproximateMemoryUsage();
|
||||
s = block_cache->Insert(
|
||||
key, index_reader, index_reader->usable_size(),
|
||||
&DeleteCachedIndexEntry, &cache_handle,
|
||||
key, index_reader, charge, &DeleteCachedIndexEntry, &cache_handle,
|
||||
rep_->table_options.cache_index_and_filter_blocks_with_high_priority
|
||||
? Cache::Priority::HIGH
|
||||
: Cache::Priority::LOW);
|
||||
}
|
||||
|
||||
if (s.ok()) {
|
||||
size_t usable_size = index_reader->usable_size();
|
||||
if (get_context != nullptr) {
|
||||
get_context->RecordCounters(BLOCK_CACHE_ADD, 1);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE, usable_size);
|
||||
get_context->RecordCounters(BLOCK_CACHE_BYTES_WRITE, charge);
|
||||
} else {
|
||||
RecordTick(statistics, BLOCK_CACHE_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, usable_size);
|
||||
RecordTick(statistics, BLOCK_CACHE_BYTES_WRITE, charge);
|
||||
}
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_ADD);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, usable_size);
|
||||
RecordTick(statistics, BLOCK_CACHE_INDEX_BYTES_INSERT, charge);
|
||||
} else {
|
||||
if (index_reader != nullptr) {
|
||||
delete index_reader;
|
||||
@ -1661,8 +1667,9 @@ BlockIter* BlockBasedTable::NewDataBlockIterator(
|
||||
static_cast<int>(kExtraCacheKeyPrefix + kMaxVarint64Length));
|
||||
Slice unique_key =
|
||||
Slice(cache_key, static_cast<size_t>(end - cache_key));
|
||||
s = block_cache->Insert(unique_key, nullptr, block.value->usable_size(),
|
||||
nullptr, &cache_handle);
|
||||
s = block_cache->Insert(unique_key, nullptr,
|
||||
block.value->ApproximateMemoryUsage(), nullptr,
|
||||
&cache_handle);
|
||||
if (s.ok()) {
|
||||
if (cache_handle != nullptr) {
|
||||
iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache,
|
||||
@ -2998,7 +3005,7 @@ void DeleteCachedFilterEntry(const Slice& /*key*/, void* value) {
|
||||
FilterBlockReader* filter = reinterpret_cast<FilterBlockReader*>(value);
|
||||
if (filter->statistics() != nullptr) {
|
||||
RecordTick(filter->statistics(), BLOCK_CACHE_FILTER_BYTES_EVICT,
|
||||
filter->size());
|
||||
filter->ApproximateMemoryUsage());
|
||||
}
|
||||
delete filter;
|
||||
}
|
||||
@ -3007,7 +3014,7 @@ void DeleteCachedIndexEntry(const Slice& /*key*/, void* value) {
|
||||
IndexReader* index_reader = reinterpret_cast<IndexReader*>(value);
|
||||
if (index_reader->statistics() != nullptr) {
|
||||
RecordTick(index_reader->statistics(), BLOCK_CACHE_INDEX_BYTES_EVICT,
|
||||
index_reader->usable_size());
|
||||
index_reader->ApproximateMemoryUsage());
|
||||
}
|
||||
delete index_reader;
|
||||
}
|
||||
|
@ -10,6 +10,13 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
#ifdef OS_FREEBSD
|
||||
#include <malloc_np.h>
|
||||
#else
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#endif
|
||||
#include "rocksdb/options.h"
|
||||
#include "rocksdb/slice.h"
|
||||
#include "rocksdb/status.h"
|
||||
@ -199,6 +206,19 @@ struct BlockContents {
|
||||
compression_type(_compression_type),
|
||||
allocation(std::move(_data)) {}
|
||||
|
||||
// The additional memory space taken by the block data.
|
||||
size_t usable_size() const {
|
||||
if (allocation.get() != nullptr) {
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return malloc_usable_size(allocation.get());
|
||||
#else
|
||||
return sizeof(*allocation.get());
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
} else {
|
||||
return 0; // no extra memory is occupied by the data
|
||||
}
|
||||
}
|
||||
|
||||
BlockContents(BlockContents&& other) ROCKSDB_NOEXCEPT {
|
||||
*this = std::move(other);
|
||||
}
|
||||
|
@ -5,6 +5,14 @@
|
||||
|
||||
#include "table/full_filter_block.h"
|
||||
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
#ifdef OS_FREEBSD
|
||||
#include <malloc_np.h>
|
||||
#else
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "monitoring/perf_context_imp.h"
|
||||
#include "port/port.h"
|
||||
#include "rocksdb/filter_policy.h"
|
||||
@ -152,7 +160,15 @@ bool FullFilterBlockReader::MayMatch(const Slice& entry) {
|
||||
}
|
||||
|
||||
size_t FullFilterBlockReader::ApproximateMemoryUsage() const {
|
||||
return contents_.size();
|
||||
size_t usage = block_contents_.usable_size();
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
usage += malloc_usable_size((void*)this);
|
||||
usage += malloc_usable_size(filter_bits_reader_.get());
|
||||
#else
|
||||
usage += sizeof(*this);
|
||||
usage += sizeof(*filter_bits_reader_.get());
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return usage;
|
||||
}
|
||||
|
||||
bool FullFilterBlockReader::RangeMayExist(const Slice* iterate_upper_bound,
|
||||
|
@ -118,7 +118,6 @@ class FullFilterBlockReader : public FilterBlockReader {
|
||||
Slice contents_;
|
||||
std::unique_ptr<FilterBitsReader> filter_bits_reader_;
|
||||
BlockContents block_contents_;
|
||||
std::unique_ptr<const char[]> filter_data_;
|
||||
bool full_length_enabled_;
|
||||
size_t prefix_extractor_full_length_;
|
||||
|
||||
|
@ -5,6 +5,13 @@
|
||||
|
||||
#include "table/partitioned_filter_block.h"
|
||||
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
#ifdef OS_FREEBSD
|
||||
#include <malloc_np.h>
|
||||
#else
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#endif
|
||||
#include <utility>
|
||||
|
||||
#include "monitoring/perf_context_imp.h"
|
||||
@ -265,7 +272,14 @@ PartitionedFilterBlockReader::GetFilterPartition(
|
||||
}
|
||||
|
||||
size_t PartitionedFilterBlockReader::ApproximateMemoryUsage() const {
|
||||
return idx_on_fltr_blk_->size();
|
||||
size_t usage = idx_on_fltr_blk_->usable_size();
|
||||
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
|
||||
usage += malloc_usable_size((void*)this);
|
||||
#else
|
||||
usage += sizeof(*this);
|
||||
#endif // ROCKSDB_MALLOC_USABLE_SIZE
|
||||
return usage;
|
||||
// TODO(myabandeh): better estimation for filter_map_ size
|
||||
}
|
||||
|
||||
// Release the cached entry and decrement its ref count.
|
||||
|
@ -1988,7 +1988,7 @@ TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
|
||||
|
||||
// Enable the cache for index/filter blocks
|
||||
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
|
||||
table_options.block_cache = NewLRUCache(1024, 4);
|
||||
table_options.block_cache = NewLRUCache(1024, 2);
|
||||
table_options.cache_index_and_filter_blocks = true;
|
||||
options.table_factory.reset(new BlockBasedTableFactory(table_options));
|
||||
std::vector<std::string> keys;
|
||||
|
Loading…
Reference in New Issue
Block a user