Charge block cache for cache internal usage (#5797)

Summary:
For our default block cache, each additional entry has extra memory overhead. It include LRUHandle (72 bytes currently) and the cache key (two varint64, file id and offset). The usage is not negligible. For example for block_size=4k, the overhead accounts for an extra 2% memory usage for the cache. The patch charging the cache for the extra usage, reducing untracked memory usage outside block cache. The feature is enabled by default and can be disabled by passing kDontChargeCacheMetadata to the cache constructor.
This PR builds up on https://github.com/facebook/rocksdb/issues/4258
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5797

Test Plan:
- Existing tests are updated to either disable the feature when the test has too much dependency on the old way of accounting the usage or increasing the cache capacity to account for the additional charge of metadata.
- The Usage tests in cache_test.cc are augmented to test the cache usage under kFullChargeCacheMetadata.

Differential Revision: D17396833

Pulled By: maysamyabandeh

fbshipit-source-id: 7684ccb9f8a40ca595e4f5efcdb03623afea0c6f
This commit is contained in:
Maysam Yabandeh 2019-09-16 15:14:51 -07:00 committed by Facebook Github Bot
parent 94d62d771e
commit 638d239507
25 changed files with 289 additions and 120 deletions

View File

@ -11,6 +11,7 @@
* When user uses options.force_consistency_check in RocksDb, instead of crashing the process, we now pass the error back to the users without killing the process.
* Add an option `memtable_insert_hint_per_batch` to WriteOptions. If it is true, each WriteBatch will maintain its own insert hints for each memtable in concurrent write. See include/rocksdb/options.h for more details.
* The `sst_dump` command line tool `recompress` command now displays how many blocks were compressed and how many were not, in particular how many were not compressed because the compression ratio was not met (12.5% threshold for GoodCompressionRatio), as seen in the `number.block.not_compressed` counter stat since version 6.0.0.
* The block cache usage is now takes into account the overhead of metadata per each entry. This results into more accurate managment of memory. A side-effect of this feature is that less items are fit into the block cache of the same size, which would result to higher cache miss rates. This can be remedied by increasing the block cache size or passing kDontChargeCacheMetadata to its constuctor to restore the old behavior.
### Public API Change
* Added max_write_buffer_size_to_maintain option to better control memory usage of immutable memtables.
* Added a lightweight API GetCurrentWalFile() to get last live WAL filename and size. Meant to be used as a helper for backup/restore tooling in a larger ecosystem such as MySQL with a MyRocks storage engine.

84
cache/cache_test.cc vendored
View File

@ -86,14 +86,22 @@ class CacheTest : public testing::TestWithParam<std::string> {
return nullptr;
}
std::shared_ptr<Cache> NewCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit) {
std::shared_ptr<Cache> NewCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy charge_policy = kDontChargeCacheMetadata) {
auto type = GetParam();
if (type == kLRU) {
return NewLRUCache(capacity, num_shard_bits, strict_capacity_limit, 0.0);
LRUCacheOptions co;
co.capacity = capacity;
co.num_shard_bits = num_shard_bits;
co.strict_capacity_limit = strict_capacity_limit;
co.high_pri_pool_ratio = 0;
co.metadata_charge_policy = charge_policy;
return NewLRUCache(co);
}
if (type == kClock) {
return NewClockCache(capacity, num_shard_bits, strict_capacity_limit);
return NewClockCache(capacity, num_shard_bits, strict_capacity_limit,
charge_policy);
}
return nullptr;
}
@ -143,10 +151,15 @@ class CacheTest : public testing::TestWithParam<std::string> {
};
CacheTest* CacheTest::current_;
class LRUCacheTest : public CacheTest {};
TEST_P(CacheTest, UsageTest) {
// cache is std::shared_ptr and will be automatically cleaned up.
const uint64_t kCapacity = 100000;
auto cache = NewCache(kCapacity, 8, false);
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata);
ASSERT_EQ(0, cache->GetUsage());
ASSERT_EQ(0, precise_cache->GetUsage());
size_t usage = 0;
char value[10] = "abcdef";
@ -155,31 +168,45 @@ TEST_P(CacheTest, UsageTest) {
std::string key(i, 'a');
auto kv_size = key.size() + 5;
cache->Insert(key, reinterpret_cast<void*>(value), kv_size, dumbDeleter);
precise_cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter);
usage += kv_size;
ASSERT_EQ(usage, cache->GetUsage());
ASSERT_LT(usage, precise_cache->GetUsage());
}
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(0, cache->GetUsage());
ASSERT_EQ(0, precise_cache->GetUsage());
// make sure the cache will be overloaded
for (uint64_t i = 1; i < kCapacity; ++i) {
auto key = ToString(i);
cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter);
precise_cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter);
}
// the usage should be close to the capacity
ASSERT_GT(kCapacity, cache->GetUsage());
ASSERT_GT(kCapacity, precise_cache->GetUsage());
ASSERT_LT(kCapacity * 0.95, cache->GetUsage());
ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage());
}
TEST_P(CacheTest, PinnedUsageTest) {
// cache is std::shared_ptr and will be automatically cleaned up.
const uint64_t kCapacity = 100000;
auto cache = NewCache(kCapacity, 8, false);
const uint64_t kCapacity = 200000;
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata);
size_t pinned_usage = 0;
char value[10] = "abcdef";
std::forward_list<Cache::Handle*> unreleased_handles;
std::forward_list<Cache::Handle*> unreleased_handles_in_precise_cache;
// Add entries. Unpin some of them after insertion. Then, pin some of them
// again. Check GetPinnedUsage().
@ -187,40 +214,72 @@ TEST_P(CacheTest, PinnedUsageTest) {
std::string key(i, 'a');
auto kv_size = key.size() + 5;
Cache::Handle* handle;
Cache::Handle* handle_in_precise_cache;
cache->Insert(key, reinterpret_cast<void*>(value), kv_size, dumbDeleter,
&handle);
assert(handle);
precise_cache->Insert(key, reinterpret_cast<void*>(value), kv_size,
dumbDeleter, &handle_in_precise_cache);
assert(handle_in_precise_cache);
pinned_usage += kv_size;
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
if (i % 2 == 0) {
cache->Release(handle);
precise_cache->Release(handle_in_precise_cache);
pinned_usage -= kv_size;
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
} else {
unreleased_handles.push_front(handle);
unreleased_handles_in_precise_cache.push_front(handle_in_precise_cache);
}
if (i % 3 == 0) {
unreleased_handles.push_front(cache->Lookup(key));
auto x = precise_cache->Lookup(key);
assert(x);
unreleased_handles_in_precise_cache.push_front(x);
// If i % 2 == 0, then the entry was unpinned before Lookup, so pinned
// usage increased
if (i % 2 == 0) {
pinned_usage += kv_size;
}
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_LT(pinned_usage, precise_cache->GetPinnedUsage());
}
}
auto precise_cache_pinned_usage = precise_cache->GetPinnedUsage();
ASSERT_LT(pinned_usage, precise_cache_pinned_usage);
// check that overloading the cache does not change the pinned usage
for (uint64_t i = 1; i < 2 * kCapacity; ++i) {
auto key = ToString(i);
cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter);
precise_cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
dumbDeleter);
}
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage());
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(pinned_usage, cache->GetPinnedUsage());
ASSERT_EQ(precise_cache_pinned_usage, precise_cache->GetPinnedUsage());
// release handles for pinned entries to prevent memory leaks
for (auto handle : unreleased_handles) {
cache->Release(handle);
}
for (auto handle : unreleased_handles_in_precise_cache) {
precise_cache->Release(handle);
}
ASSERT_EQ(0, cache->GetPinnedUsage());
ASSERT_EQ(0, precise_cache->GetPinnedUsage());
cache->EraseUnRefEntries();
precise_cache->EraseUnRefEntries();
ASSERT_EQ(0, cache->GetUsage());
ASSERT_EQ(0, precise_cache->GetUsage());
}
TEST_P(CacheTest, HitAndMiss) {
@ -550,10 +609,10 @@ TEST_P(CacheTest, SetCapacity) {
}
}
TEST_P(CacheTest, SetStrictCapacityLimit) {
TEST_P(LRUCacheTest, SetStrictCapacityLimit) {
// test1: set the flag to false. Insert more keys than capacity. See if they
// all go through.
std::shared_ptr<Cache> cache = NewLRUCache(5, 0, false);
std::shared_ptr<Cache> cache = NewCache(5, 0, false);
std::vector<Cache::Handle*> handles(10);
Status s;
for (size_t i = 0; i < 10; i++) {
@ -579,7 +638,7 @@ TEST_P(CacheTest, SetStrictCapacityLimit) {
}
// test3: init with flag being true.
std::shared_ptr<Cache> cache2 = NewLRUCache(5, 0, true);
std::shared_ptr<Cache> cache2 = NewCache(5, 0, true);
for (size_t i = 0; i < 5; i++) {
std::string key = ToString(i + 1);
s = cache2->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
@ -697,13 +756,14 @@ TEST_P(CacheTest, GetCharge) {
}
#ifdef SUPPORT_CLOCK_CACHE
std::shared_ptr<Cache> (*new_clock_cache_func)(size_t, int,
bool) = NewClockCache;
std::shared_ptr<Cache> (*new_clock_cache_func)(
size_t, int, bool, CacheMetadataChargePolicy) = NewClockCache;
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
testing::Values(kLRU, kClock));
#else
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, testing::Values(kLRU));
#endif // SUPPORT_CLOCK_CACHE
INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, testing::Values(kLRU));
} // namespace rocksdb

59
cache/clock_cache.cc vendored
View File

@ -13,8 +13,9 @@
namespace rocksdb {
std::shared_ptr<Cache> NewClockCache(size_t /*capacity*/, int /*num_shard_bits*/,
bool /*strict_capacity_limit*/) {
std::shared_ptr<Cache> NewClockCache(
size_t /*capacity*/, int /*num_shard_bits*/, bool /*strict_capacity_limit*/,
CacheMetadataChargePolicy /*metadata_charge_policy*/) {
// Clock cache not supported.
return nullptr;
}
@ -35,6 +36,7 @@ std::shared_ptr<Cache> NewClockCache(size_t /*capacity*/, int /*num_shard_bits*/
#include "tbb/concurrent_hash_map.h"
#include "cache/sharded_cache.h"
#include "port/malloc.h"
#include "port/port.h"
#include "util/autovector.h"
#include "util/mutexlock.h"
@ -202,6 +204,27 @@ struct CacheHandle {
deleter = a.deleter;
return *this;
}
inline static size_t CalcTotalCharge(
Slice key, size_t charge,
CacheMetadataChargePolicy metadata_charge_policy) {
size_t meta_charge = 0;
if (metadata_charge_policy == kFullChargeCacheMetadata) {
meta_charge += sizeof(CacheHandle);
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge +=
malloc_usable_size(static_cast<void*>(const_cast<char*>(key.data())));
#else
meta_charge += key.size();
#endif
}
return charge + meta_charge;
}
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
return CalcTotalCharge(key, charge, metadata_charge_policy);
}
};
// Key of hash map. We store hash value with the key for convenience.
@ -404,11 +427,12 @@ void ClockCacheShard::RecycleHandle(CacheHandle* handle,
assert(!InCache(handle->flags) && CountRefs(handle->flags) == 0);
context->to_delete_key.push_back(handle->key.data());
context->to_delete_value.emplace_back(*handle);
size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
handle->key.clear();
handle->value = nullptr;
handle->deleter = nullptr;
recycle_.push_back(handle);
usage_.fetch_sub(handle->charge, std::memory_order_relaxed);
usage_.fetch_sub(total_charge, std::memory_order_relaxed);
}
void ClockCacheShard::Cleanup(const CleanupContext& context) {
@ -434,7 +458,8 @@ bool ClockCacheShard::Ref(Cache::Handle* h) {
std::memory_order_relaxed)) {
if (CountRefs(flags) == 0) {
// No reference count before the operation.
pinned_usage_.fetch_add(handle->charge, std::memory_order_relaxed);
size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
}
return true;
}
@ -454,7 +479,8 @@ bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage,
assert(CountRefs(flags) > 0);
if (CountRefs(flags) == 1) {
// this is the last reference.
pinned_usage_.fetch_sub(handle->charge, std::memory_order_relaxed);
size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
pinned_usage_.fetch_sub(total_charge, std::memory_order_relaxed);
// Cleanup if it is the last reference.
if (!InCache(flags)) {
MutexLock l(&mutex_);
@ -539,8 +565,10 @@ CacheHandle* ClockCacheShard::Insert(
const Slice& key, uint32_t hash, void* value, size_t charge,
void (*deleter)(const Slice& key, void* value), bool hold_reference,
CleanupContext* context) {
size_t total_charge =
CacheHandle::CalcTotalCharge(key, charge, metadata_charge_policy_);
MutexLock l(&mutex_);
bool success = EvictFromCache(charge, context);
bool success = EvictFromCache(total_charge, context);
bool strict = strict_capacity_limit_.load(std::memory_order_relaxed);
if (!success && (strict || !hold_reference)) {
context->to_delete_key.push_back(key.data());
@ -575,9 +603,9 @@ CacheHandle* ClockCacheShard::Insert(
}
table_.insert(HashTable::value_type(CacheKey(key, hash), handle));
if (hold_reference) {
pinned_usage_.fetch_add(charge, std::memory_order_relaxed);
pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
}
usage_.fetch_add(charge, std::memory_order_relaxed);
usage_.fetch_add(total_charge, std::memory_order_relaxed);
return handle;
}
@ -674,10 +702,14 @@ void ClockCacheShard::EraseUnRefEntries() {
class ClockCache final : public ShardedCache {
public:
ClockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit)
ClockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
int num_shards = 1 << num_shard_bits;
shards_ = new ClockCacheShard[num_shards];
for (int i = 0; i < num_shards; i++) {
shards_[i].set_metadata_charge_policy(metadata_charge_policy);
}
SetCapacity(capacity);
SetStrictCapacityLimit(strict_capacity_limit);
}
@ -714,13 +746,14 @@ class ClockCache final : public ShardedCache {
} // end anonymous namespace
std::shared_ptr<Cache> NewClockCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit) {
std::shared_ptr<Cache> NewClockCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) {
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<ClockCache>(capacity, num_shard_bits,
strict_capacity_limit);
return std::make_shared<ClockCache>(
capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
}
} // namespace rocksdb

66
cache/lru_cache.cc vendored
View File

@ -97,7 +97,8 @@ void LRUHandleTable::Resize() {
LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
double high_pri_pool_ratio,
bool use_adaptive_mutex)
bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy)
: capacity_(0),
high_pri_pool_usage_(0),
strict_capacity_limit_(strict_capacity_limit),
@ -106,6 +107,7 @@ LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
usage_(0),
lru_usage_(0),
mutex_(use_adaptive_mutex) {
set_metadata_charge_policy(metadata_charge_policy);
// Make empty circular linked list
lru_.next = &lru_;
lru_.prev = &lru_;
@ -124,7 +126,9 @@ void LRUCacheShard::EraseUnRefEntries() {
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
usage_ -= old->charge;
size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference_list.push_back(old);
}
}
@ -180,16 +184,19 @@ void LRUCacheShard::LRU_Remove(LRUHandle* e) {
e->next->prev = e->prev;
e->prev->next = e->next;
e->prev = e->next = nullptr;
lru_usage_ -= e->charge;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(lru_usage_ >= total_charge);
lru_usage_ -= total_charge;
if (e->InHighPriPool()) {
assert(high_pri_pool_usage_ >= e->charge);
high_pri_pool_usage_ -= e->charge;
assert(high_pri_pool_usage_ >= total_charge);
high_pri_pool_usage_ -= total_charge;
}
}
void LRUCacheShard::LRU_Insert(LRUHandle* e) {
assert(e->next == nullptr);
assert(e->prev == nullptr);
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
if (high_pri_pool_ratio_ > 0 && (e->IsHighPri() || e->HasHit())) {
// Inset "e" to head of LRU list.
e->next = &lru_;
@ -197,7 +204,7 @@ void LRUCacheShard::LRU_Insert(LRUHandle* e) {
e->prev->next = e;
e->next->prev = e;
e->SetInHighPriPool(true);
high_pri_pool_usage_ += e->charge;
high_pri_pool_usage_ += total_charge;
MaintainPoolSize();
} else {
// Insert "e" to the head of low-pri pool. Note that when
@ -209,7 +216,7 @@ void LRUCacheShard::LRU_Insert(LRUHandle* e) {
e->SetInHighPriPool(false);
lru_low_pri_ = e;
}
lru_usage_ += e->charge;
lru_usage_ += total_charge;
}
void LRUCacheShard::MaintainPoolSize() {
@ -218,6 +225,7 @@ void LRUCacheShard::MaintainPoolSize() {
lru_low_pri_ = lru_low_pri_->next;
assert(lru_low_pri_ != &lru_);
lru_low_pri_->SetInHighPriPool(false);
assert(high_pri_pool_usage_ >= lru_low_pri_->charge);
high_pri_pool_usage_ -= lru_low_pri_->charge;
}
}
@ -231,7 +239,9 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
LRU_Remove(old);
table_.Remove(old->key(), old->hash);
old->SetInCache(false);
usage_ -= old->charge;
size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
deleted->push_back(old);
}
}
@ -311,7 +321,9 @@ bool LRUCacheShard::Release(Cache::Handle* handle, bool force_erase) {
}
}
if (last_reference) {
usage_ -= e->charge;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
}
}
@ -345,15 +357,16 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
e->SetInCache(true);
e->SetPriority(priority);
memcpy(e->key_data, key.data(), key.size());
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
{
MutexLock l(&mutex_);
// Free the space following strict LRU policy until enough space
// is freed or the lru list is empty
EvictFromLRU(charge, &last_reference_list);
EvictFromLRU(total_charge, &last_reference_list);
if ((usage_ + charge) > capacity_ &&
if ((usage_ + total_charge) > capacity_ &&
(strict_capacity_limit_ || handle == nullptr)) {
if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted
@ -369,14 +382,17 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
// Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up.
LRUHandle* old = table_.Insert(e);
usage_ += e->charge;
usage_ += total_charge;
if (old != nullptr) {
assert(old->InCache());
old->SetInCache(false);
if (!old->HasRefs()) {
// old is on LRU because it's in cache and its reference count is 0
LRU_Remove(old);
usage_ -= old->charge;
size_t old_total_charge =
old->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= old_total_charge);
usage_ -= old_total_charge;
last_reference_list.push_back(old);
}
}
@ -409,7 +425,9 @@ void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
if (!e->HasRefs()) {
// The entry is in LRU since it's in hash and has no external references
LRU_Remove(e);
usage_ -= e->charge;
size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
assert(usage_ >= total_charge);
usage_ -= total_charge;
last_reference = true;
}
}
@ -447,7 +465,8 @@ std::string LRUCacheShard::GetPrintableOptions() const {
LRUCache::LRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit, double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> allocator,
bool use_adaptive_mutex)
bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
std::move(allocator)) {
num_shards_ = 1 << num_shard_bits;
@ -457,7 +476,7 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits,
for (int i = 0; i < num_shards_; i++) {
new (&shards_[i])
LRUCacheShard(per_shard, strict_capacity_limit, high_pri_pool_ratio,
use_adaptive_mutex);
use_adaptive_mutex, metadata_charge_policy);
}
}
@ -526,15 +545,15 @@ std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
cache_opts.strict_capacity_limit,
cache_opts.high_pri_pool_ratio,
cache_opts.memory_allocator,
cache_opts.use_adaptive_mutex);
cache_opts.memory_allocator, cache_opts.use_adaptive_mutex,
cache_opts.metadata_charge_policy);
}
std::shared_ptr<Cache> NewLRUCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator,
bool use_adaptive_mutex) {
std::shared_ptr<MemoryAllocator> memory_allocator, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy) {
if (num_shard_bits >= 20) {
return nullptr; // the cache cannot be sharded into too many fine pieces
}
@ -545,10 +564,9 @@ std::shared_ptr<Cache> NewLRUCache(
if (num_shard_bits < 0) {
num_shard_bits = GetDefaultCacheShardBits(capacity);
}
return std::make_shared<LRUCache>(capacity, num_shard_bits,
strict_capacity_limit, high_pri_pool_ratio,
std::move(memory_allocator),
use_adaptive_mutex);
return std::make_shared<LRUCache>(
capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio,
std::move(memory_allocator), use_adaptive_mutex, metadata_charge_policy);
}
} // namespace rocksdb

24
cache/lru_cache.h vendored
View File

@ -12,6 +12,7 @@
#include "cache/sharded_cache.h"
#include "port/malloc.h"
#include "port/port.h"
#include "util/autovector.h"
@ -128,6 +129,22 @@ struct LRUHandle {
}
delete[] reinterpret_cast<char*>(this);
}
// Caclculate the memory usage by metadata
inline size_t CalcTotalCharge(
CacheMetadataChargePolicy metadata_charge_policy) {
assert(key_length);
size_t meta_charge = 0;
if (metadata_charge_policy == kFullChargeCacheMetadata) {
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
meta_charge += malloc_usable_size(static_cast<void*>(this));
#else
// This is the size that is used when a new handle is created
meta_charge += sizeof(LRUHandle) - 1 + key_length;
#endif
}
return charge + meta_charge;
}
};
// We provide our own simple hash table since it removes a whole bunch
@ -176,7 +193,8 @@ class LRUHandleTable {
class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
public:
LRUCacheShard(size_t capacity, bool strict_capacity_limit,
double high_pri_pool_ratio, bool use_adaptive_mutex);
double high_pri_pool_ratio, bool use_adaptive_mutex,
CacheMetadataChargePolicy metadata_charge_policy);
virtual ~LRUCacheShard() override = default;
// Separate from constructor so caller can easily make an array of LRUCache
@ -297,7 +315,9 @@ class LRUCache
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex);
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata);
virtual ~LRUCache();
virtual const char* Name() const override { return "LRUCache"; }
virtual CacheShard* GetShard(int shard) override;

View File

@ -31,7 +31,8 @@ class LRUCacheTest : public testing::Test {
cache_ = reinterpret_cast<LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(LRUCacheShard)));
new (cache_) LRUCacheShard(capacity, false /*strict_capcity_limit*/,
high_pri_pool_ratio, use_adaptive_mutex);
high_pri_pool_ratio, use_adaptive_mutex,
kDontChargeCacheMetadata);
}
void Insert(const std::string& key,

View File

@ -40,6 +40,13 @@ class CacheShard {
bool thread_safe) = 0;
virtual void EraseUnRefEntries() = 0;
virtual std::string GetPrintableOptions() const { return ""; }
void set_metadata_charge_policy(
CacheMetadataChargePolicy metadata_charge_policy) {
metadata_charge_policy_ = metadata_charge_policy;
}
protected:
CacheMetadataChargePolicy metadata_charge_policy_ = kDontChargeCacheMetadata;
};
// Generic cache interface which shards cache by hash of keys. 2^num_shard_bits

View File

@ -380,8 +380,13 @@ TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) {
options.statistics = rocksdb::CreateDBStatistics();
BlockBasedTableOptions table_options;
table_options.cache_index_and_filter_blocks = true;
LRUCacheOptions co;
// 500 bytes are enough to hold the first two blocks
std::shared_ptr<Cache> cache = NewLRUCache(500, 0, false);
co.capacity = 500;
co.num_shard_bits = 0;
co.strict_capacity_limit = false;
co.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<Cache> cache = NewLRUCache(co);
table_options.block_cache = cache;
table_options.filter_policy.reset(NewBloomFilterPolicy(20, true));
options.table_factory.reset(new BlockBasedTableFactory(table_options));

View File

@ -240,8 +240,11 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
const int table_cache_size = (mutable_db_options_.max_open_files == -1)
? TableCache::kInfiniteCapacity
: mutable_db_options_.max_open_files - 10;
table_cache_ = NewLRUCache(table_cache_size,
immutable_db_options_.table_cache_numshardbits);
LRUCacheOptions co;
co.capacity = table_cache_size;
co.num_shard_bits = immutable_db_options_.table_cache_numshardbits;
co.metadata_charge_policy = kDontChargeCacheMetadata;
table_cache_ = NewLRUCache(co);
versions_.reset(new VersionSet(dbname_, &immutable_db_options_, env_options_,
table_cache_.get(), write_buffer_manager_,

View File

@ -1070,7 +1070,8 @@ TEST_P(DBIteratorTest, IndexWithFirstKey) {
BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
table_options.flush_block_policy_factory =
std::make_shared<FlushBlockEveryKeyPolicyFactory>();
table_options.block_cache = NewLRUCache(1000); // fits all blocks
table_options.block_cache =
NewLRUCache(8000); // fits all blocks and their cache metadata overhead
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
DestroyAndReopen(options);

View File

@ -1631,7 +1631,11 @@ TEST_F(DBPropertiesTest, BlockCacheProperties) {
// Test with empty block cache.
constexpr size_t kCapacity = 100;
auto block_cache = NewLRUCache(kCapacity, 0 /*num_shard_bits*/);
LRUCacheOptions co;
co.capacity = kCapacity;
co.num_shard_bits = 0;
co.metadata_charge_policy = kDontChargeCacheMetadata;
auto block_cache = NewLRUCache(co);
table_options.block_cache = block_cache;
table_options.no_block_cache = false;
options.table_factory.reset(NewBlockBasedTableFactory(table_options));

View File

@ -3780,7 +3780,7 @@ TEST_F(DBTest2, CloseWithUnreleasedSnapshot) {
TEST_F(DBTest2, RowCacheSnapshot) {
Options options = CurrentOptions();
options.statistics = rocksdb::CreateDBStatistics();
options.row_cache = NewLRUCache(8192);
options.row_cache = NewLRUCache(8 * 8192);
DestroyAndReopen(options);
ASSERT_OK(Put("foo", "bar1"));

8
env/env_test.cc vendored
View File

@ -11,13 +11,6 @@
#include <sys/ioctl.h>
#endif
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
#ifdef OS_FREEBSD
#include <malloc_np.h>
#else
#include <malloc.h>
#endif
#endif
#include <sys/types.h>
#include <iostream>
@ -39,6 +32,7 @@
#include "env/env_chroot.h"
#include "logging/log_buffer.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "test_util/sync_point.h"

View File

@ -36,6 +36,13 @@ class Cache;
extern const bool kDefaultToAdaptiveMutex;
enum CacheMetadataChargePolicy {
kDontChargeCacheMetadata,
kFullChargeCacheMetadata
};
const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy =
kFullChargeCacheMetadata;
struct LRUCacheOptions {
// Capacity of the cache.
size_t capacity = 0;
@ -76,17 +83,23 @@ struct LRUCacheOptions {
// -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
bool use_adaptive_mutex = kDefaultToAdaptiveMutex;
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy;
LRUCacheOptions() {}
LRUCacheOptions(size_t _capacity, int _num_shard_bits,
bool _strict_capacity_limit, double _high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
bool _use_adaptive_mutex = kDefaultToAdaptiveMutex)
bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy)
: capacity(_capacity),
num_shard_bits(_num_shard_bits),
strict_capacity_limit(_strict_capacity_limit),
high_pri_pool_ratio(_high_pri_pool_ratio),
memory_allocator(std::move(_memory_allocator)),
use_adaptive_mutex(_use_adaptive_mutex) {}
use_adaptive_mutex(_use_adaptive_mutex),
metadata_charge_policy(_metadata_charge_policy) {}
};
// Create a new cache with a fixed size capacity. The cache is sharded
@ -101,7 +114,9 @@ extern std::shared_ptr<Cache> NewLRUCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex);
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy);
extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts);
@ -110,10 +125,11 @@ extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts);
// more detail.
//
// Return nullptr if it is not supported.
extern std::shared_ptr<Cache> NewClockCache(size_t capacity,
int num_shard_bits = -1,
bool strict_capacity_limit = false);
extern std::shared_ptr<Cache> NewClockCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy);
class Cache {
public:
// Depending on implementation, cache entries with high priority could be less

View File

@ -8,18 +8,12 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "memory/arena.h"
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
#ifdef OS_FREEBSD
#include <malloc_np.h>
#else
#include <malloc.h>
#endif
#endif
#ifndef OS_WIN
#include <sys/mman.h>
#endif
#include <algorithm>
#include "logging/logging.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "test_util/sync_point.h"

View File

@ -51,8 +51,12 @@ TEST_F(WriteBufferManagerTest, ShouldFlush) {
}
TEST_F(WriteBufferManagerTest, CacheCost) {
LRUCacheOptions co;
// 1GB cache
std::shared_ptr<Cache> cache = NewLRUCache(1024 * 1024 * 1024, 4);
co.capacity = 1024 * 1024 * 1024;
co.num_shard_bits = 4;
co.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<Cache> cache = NewLRUCache(co);
// A write buffer manager of size 50MB
std::unique_ptr<WriteBufferManager> wbf(
new WriteBufferManager(50 * 1024 * 1024, cache));

17
port/malloc.h Normal file
View File

@ -0,0 +1,17 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
#ifdef OS_FREEBSD
#include <malloc_np.h>
#else
#include <malloc.h>
#endif // OS_FREEBSD
#endif // ROCKSDB_MALLOC_USABLE_SIZE

View File

@ -12,16 +12,10 @@
#include <stdint.h>
#include <string>
#include <vector>
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
#ifdef OS_FREEBSD
#include <malloc_np.h>
#else
#include <malloc.h>
#endif
#endif
#include "db/dbformat.h"
#include "db/pinned_iterators_manager.h"
#include "port/malloc.h"
#include "rocksdb/iterator.h"
#include "rocksdb/options.h"
#include "rocksdb/statistics.h"

View File

@ -6,15 +6,8 @@
#include <array>
#include "table/block_based/full_filter_block.h"
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
#ifdef OS_FREEBSD
#include <malloc_np.h>
#else
#include <malloc.h>
#endif
#endif
#include "monitoring/perf_context_imp.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_reader.h"

View File

@ -5,16 +5,10 @@
#include "table/block_based/partitioned_filter_block.h"
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
#ifdef OS_FREEBSD
#include <malloc_np.h>
#else
#include <malloc.h>
#endif
#endif
#include <utility>
#include "monitoring/perf_context_imp.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block.h"

View File

@ -10,13 +10,6 @@
#pragma once
#include <stdint.h>
#include <string>
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
#ifdef OS_FREEBSD
#include <malloc_np.h>
#else
#include <malloc.h>
#endif
#endif
#include "file/file_prefetch_buffer.h"
#include "file/random_access_file_reader.h"
@ -27,6 +20,7 @@
#include "memory/memory_allocator.h"
#include "options/cf_options.h"
#include "port/malloc.h"
#include "port/port.h" // noexcept
#include "table/persistent_cache_options.h"
#include "util/crc32c.h"

View File

@ -2599,7 +2599,11 @@ TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
// Enable the cache for index/filter blocks
BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
table_options.block_cache = NewLRUCache(2048, 2);
LRUCacheOptions co;
co.capacity = 2048;
co.num_shard_bits = 2;
co.metadata_charge_policy = kDontChargeCacheMetadata;
table_options.block_cache = NewLRUCache(co);
table_options.cache_index_and_filter_blocks = true;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
std::vector<std::string> keys;

View File

@ -313,10 +313,13 @@ TEST_F(CacheSimulatorTest, HybridRowBlockCacheSimulatorGetTest) {
get.sst_fd_number = 0;
get.get_from_user_specified_snapshot = Boolean::kFalse;
std::shared_ptr<Cache> sim_cache =
NewLRUCache(/*capacity=*/16, /*num_shard_bits=*/1,
/*strict_capacity_limit=*/false,
/*high_pri_pool_ratio=*/0);
LRUCacheOptions co;
co.capacity = 16;
co.num_shard_bits = 1;
co.strict_capacity_limit = false;
co.high_pri_pool_ratio = 0;
co.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<Cache> sim_cache = NewLRUCache(co);
std::unique_ptr<HybridRowBlockCacheSimulator> cache_simulator(
new HybridRowBlockCacheSimulator(
nullptr, sim_cache, /*insert_blocks_row_kvpair_misses=*/true));

View File

@ -331,8 +331,11 @@ class SimCacheImpl : public SimCache {
// For instrumentation purpose, use NewSimCache instead
std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> cache,
size_t sim_capacity, int num_shard_bits) {
return NewSimCache(NewLRUCache(sim_capacity, num_shard_bits), cache,
num_shard_bits);
LRUCacheOptions co;
co.capacity = sim_capacity;
co.num_shard_bits = num_shard_bits;
co.metadata_charge_policy = kDontChargeCacheMetadata;
return NewSimCache(NewLRUCache(co), cache, num_shard_bits);
}
std::shared_ptr<SimCache> NewSimCache(std::shared_ptr<Cache> sim_cache,

View File

@ -77,8 +77,12 @@ TEST_F(SimCacheTest, SimCache) {
auto table_options = GetTableOptions();
auto options = GetOptions(table_options);
InitTable(options);
std::shared_ptr<SimCache> simCache =
NewSimCache(NewLRUCache(0, 0, false), 20000, 0);
LRUCacheOptions co;
co.capacity = 0;
co.num_shard_bits = 0;
co.strict_capacity_limit = false;
co.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SimCache> simCache = NewSimCache(NewLRUCache(co), 20000, 0);
table_options.block_cache = simCache;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
Reopen(options);
@ -142,8 +146,10 @@ TEST_F(SimCacheTest, SimCacheLogging) {
auto table_options = GetTableOptions();
auto options = GetOptions(table_options);
options.disable_auto_compactions = true;
std::shared_ptr<SimCache> sim_cache =
NewSimCache(NewLRUCache(1024 * 1024), 20000, 0);
LRUCacheOptions co;
co.capacity = 1024 * 1024;
co.metadata_charge_policy = kDontChargeCacheMetadata;
std::shared_ptr<SimCache> sim_cache = NewSimCache(NewLRUCache(co), 20000, 0);
table_options.block_cache = sim_cache;
options.table_factory.reset(new BlockBasedTableFactory(table_options));
Reopen(options);