rocksdb/memory/arena.cc
Maysam Yabandeh 638d239507 Charge block cache for cache internal usage (#5797)
Summary:
For our default block cache, each additional entry has extra memory overhead. It include LRUHandle (72 bytes currently) and the cache key (two varint64, file id and offset). The usage is not negligible. For example for block_size=4k, the overhead accounts for an extra 2% memory usage for the cache. The patch charging the cache for the extra usage, reducing untracked memory usage outside block cache. The feature is enabled by default and can be disabled by passing kDontChargeCacheMetadata to the cache constructor.
This PR builds up on https://github.com/facebook/rocksdb/issues/4258
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5797

Test Plan:
- Existing tests are updated to either disable the feature when the test has too much dependency on the old way of accounting the usage or increasing the cache capacity to account for the additional charge of metadata.
- The Usage tests in cache_test.cc are augmented to test the cache usage under kFullChargeCacheMetadata.

Differential Revision: D17396833

Pulled By: maysamyabandeh

fbshipit-source-id: 7684ccb9f8a40ca595e4f5efcdb03623afea0c6f
2019-09-16 15:26:21 -07:00

234 lines
7.0 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "memory/arena.h"
#ifndef OS_WIN
#include <sys/mman.h>
#endif
#include <algorithm>
#include "logging/logging.h"
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "test_util/sync_point.h"
namespace rocksdb {
// MSVC complains that it is already defined since it is static in the header.
#ifndef _MSC_VER
const size_t Arena::kInlineSize;
#endif
const size_t Arena::kMinBlockSize = 4096;
const size_t Arena::kMaxBlockSize = 2u << 30;
static const int kAlignUnit = alignof(max_align_t);
size_t OptimizeBlockSize(size_t block_size) {
// Make sure block_size is in optimal range
block_size = std::max(Arena::kMinBlockSize, block_size);
block_size = std::min(Arena::kMaxBlockSize, block_size);
// make sure block_size is the multiple of kAlignUnit
if (block_size % kAlignUnit != 0) {
block_size = (1 + block_size / kAlignUnit) * kAlignUnit;
}
return block_size;
}
Arena::Arena(size_t block_size, AllocTracker* tracker, size_t huge_page_size)
: kBlockSize(OptimizeBlockSize(block_size)), tracker_(tracker) {
assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize &&
kBlockSize % kAlignUnit == 0);
TEST_SYNC_POINT_CALLBACK("Arena::Arena:0", const_cast<size_t*>(&kBlockSize));
alloc_bytes_remaining_ = sizeof(inline_block_);
blocks_memory_ += alloc_bytes_remaining_;
aligned_alloc_ptr_ = inline_block_;
unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
#ifdef MAP_HUGETLB
hugetlb_size_ = huge_page_size;
if (hugetlb_size_ && kBlockSize > hugetlb_size_) {
hugetlb_size_ = ((kBlockSize - 1U) / hugetlb_size_ + 1U) * hugetlb_size_;
}
#else
(void)huge_page_size;
#endif
if (tracker_ != nullptr) {
tracker_->Allocate(kInlineSize);
}
}
Arena::~Arena() {
if (tracker_ != nullptr) {
assert(tracker_->is_freed());
tracker_->FreeMem();
}
for (const auto& block : blocks_) {
delete[] block;
}
#ifdef MAP_HUGETLB
for (const auto& mmap_info : huge_blocks_) {
if (mmap_info.addr_ == nullptr) {
continue;
}
auto ret = munmap(mmap_info.addr_, mmap_info.length_);
if (ret != 0) {
// TODO(sdong): Better handling
}
}
#endif
}
char* Arena::AllocateFallback(size_t bytes, bool aligned) {
if (bytes > kBlockSize / 4) {
++irregular_block_num;
// Object is more than a quarter of our block size. Allocate it separately
// to avoid wasting too much space in leftover bytes.
return AllocateNewBlock(bytes);
}
// We waste the remaining space in the current block.
size_t size = 0;
char* block_head = nullptr;
#ifdef MAP_HUGETLB
if (hugetlb_size_) {
size = hugetlb_size_;
block_head = AllocateFromHugePage(size);
}
#endif
if (!block_head) {
size = kBlockSize;
block_head = AllocateNewBlock(size);
}
alloc_bytes_remaining_ = size - bytes;
if (aligned) {
aligned_alloc_ptr_ = block_head + bytes;
unaligned_alloc_ptr_ = block_head + size;
return block_head;
} else {
aligned_alloc_ptr_ = block_head;
unaligned_alloc_ptr_ = block_head + size - bytes;
return unaligned_alloc_ptr_;
}
}
char* Arena::AllocateFromHugePage(size_t bytes) {
#ifdef MAP_HUGETLB
if (hugetlb_size_ == 0) {
return nullptr;
}
// Reserve space in `huge_blocks_` before calling `mmap`.
// Use `emplace_back()` instead of `reserve()` to let std::vector manage its
// own memory and do fewer reallocations.
//
// - If `emplace_back` throws, no memory leaks because we haven't called
// `mmap` yet.
// - If `mmap` throws, no memory leaks because the vector will be cleaned up
// via RAII.
huge_blocks_.emplace_back(nullptr /* addr */, 0 /* length */);
void* addr = mmap(nullptr, bytes, (PROT_READ | PROT_WRITE),
(MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), -1, 0);
if (addr == MAP_FAILED) {
return nullptr;
}
huge_blocks_.back() = MmapInfo(addr, bytes);
blocks_memory_ += bytes;
if (tracker_ != nullptr) {
tracker_->Allocate(bytes);
}
return reinterpret_cast<char*>(addr);
#else
(void)bytes;
return nullptr;
#endif
}
char* Arena::AllocateAligned(size_t bytes, size_t huge_page_size,
Logger* logger) {
assert((kAlignUnit & (kAlignUnit - 1)) ==
0); // Pointer size should be a power of 2
#ifdef MAP_HUGETLB
if (huge_page_size > 0 && bytes > 0) {
// Allocate from a huge page TBL table.
assert(logger != nullptr); // logger need to be passed in.
size_t reserved_size =
((bytes - 1U) / huge_page_size + 1U) * huge_page_size;
assert(reserved_size >= bytes);
char* addr = AllocateFromHugePage(reserved_size);
if (addr == nullptr) {
ROCKS_LOG_WARN(logger,
"AllocateAligned fail to allocate huge TLB pages: %s",
strerror(errno));
// fail back to malloc
} else {
return addr;
}
}
#else
(void)huge_page_size;
(void)logger;
#endif
size_t current_mod =
reinterpret_cast<uintptr_t>(aligned_alloc_ptr_) & (kAlignUnit - 1);
size_t slop = (current_mod == 0 ? 0 : kAlignUnit - current_mod);
size_t needed = bytes + slop;
char* result;
if (needed <= alloc_bytes_remaining_) {
result = aligned_alloc_ptr_ + slop;
aligned_alloc_ptr_ += needed;
alloc_bytes_remaining_ -= needed;
} else {
// AllocateFallback always returns aligned memory
result = AllocateFallback(bytes, true /* aligned */);
}
assert((reinterpret_cast<uintptr_t>(result) & (kAlignUnit - 1)) == 0);
return result;
}
char* Arena::AllocateNewBlock(size_t block_bytes) {
// Reserve space in `blocks_` before allocating memory via new.
// Use `emplace_back()` instead of `reserve()` to let std::vector manage its
// own memory and do fewer reallocations.
//
// - If `emplace_back` throws, no memory leaks because we haven't called `new`
// yet.
// - If `new` throws, no memory leaks because the vector will be cleaned up
// via RAII.
blocks_.emplace_back(nullptr);
char* block = new char[block_bytes];
size_t allocated_size;
#ifdef ROCKSDB_MALLOC_USABLE_SIZE
allocated_size = malloc_usable_size(block);
#ifndef NDEBUG
// It's hard to predict what malloc_usable_size() returns.
// A callback can allow users to change the costed size.
std::pair<size_t*, size_t*> pair(&allocated_size, &block_bytes);
TEST_SYNC_POINT_CALLBACK("Arena::AllocateNewBlock:0", &pair);
#endif // NDEBUG
#else
allocated_size = block_bytes;
#endif // ROCKSDB_MALLOC_USABLE_SIZE
blocks_memory_ += allocated_size;
if (tracker_ != nullptr) {
tracker_->Allocate(allocated_size);
}
blocks_.back() = block;
return block;
}
} // namespace rocksdb