rocksdb/util/arena.h
Mike Kolupaev e85f2c64cb Prevent empty memtables from using a lot of memory
Summary:
This fixes OOMs that we (logdevice) are currently having in production.

SkipListRep constructor does a couple small allocations from ConcurrentArena (see InlineSkipList constructor). ConcurrentArena would sometimes allocate an entire block for that, which is a few megabytes (we use Options::arena_block_size = 4 MB). So an empty memtable can take take 4 MB of memory. We have ~40k column families (spread across 15 DB instances), so 4 MB per empty memtable easily OOMs a machine for us.

This PR makes ConcurrentArena always allocate from Arena's inline block when possible. So as long as InlineSkipList's initial allocations are below 2 KB there would be no blocks allocated for empty memtables.
Closes https://github.com/facebook/rocksdb/pull/2569

Differential Revision: D5404029

Pulled By: al13n321

fbshipit-source-id: 568ec22a3fd1a485c06123f6b2dfc5e9ef67cd23
2017-07-28 15:58:43 -07:00

142 lines
5.1 KiB
C++

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// Arena is an implementation of Allocator class. For a request of small size,
// it allocates a block with pre-defined block size. For a request of big
// size, it uses malloc to directly get the requested size.
#pragma once
#ifndef OS_WIN
#include <sys/mman.h>
#endif
#include <cstddef>
#include <cerrno>
#include <vector>
#include <assert.h>
#include <stdint.h>
#include "util/allocator.h"
#include "util/mutexlock.h"
namespace rocksdb {
class Arena : public Allocator {
public:
// No copying allowed
Arena(const Arena&) = delete;
void operator=(const Arena&) = delete;
static const size_t kInlineSize = 2048;
static const size_t kMinBlockSize;
static const size_t kMaxBlockSize;
// huge_page_size: if 0, don't use huge page TLB. If > 0 (should set to the
// supported hugepage size of the system), block allocation will try huge
// page TLB first. If allocation fails, will fall back to normal case.
explicit Arena(size_t block_size = kMinBlockSize,
AllocTracker* tracker = nullptr, size_t huge_page_size = 0);
~Arena();
char* Allocate(size_t bytes) override;
// huge_page_size: if >0, will try to allocate from huage page TLB.
// The argument will be the size of the page size for huge page TLB. Bytes
// will be rounded up to multiple of the page size to allocate through mmap
// anonymous option with huge page on. The extra space allocated will be
// wasted. If allocation fails, will fall back to normal case. To enable it,
// need to reserve huge pages for it to be allocated, like:
// sysctl -w vm.nr_hugepages=20
// See linux doc Documentation/vm/hugetlbpage.txt for details.
// huge page allocation can fail. In this case it will fail back to
// normal cases. The messages will be logged to logger. So when calling with
// huge_page_tlb_size > 0, we highly recommend a logger is passed in.
// Otherwise, the error message will be printed out to stderr directly.
char* AllocateAligned(size_t bytes, size_t huge_page_size = 0,
Logger* logger = nullptr) override;
// Returns an estimate of the total memory usage of data allocated
// by the arena (exclude the space allocated but not yet used for future
// allocations).
size_t ApproximateMemoryUsage() const {
return blocks_memory_ + blocks_.capacity() * sizeof(char*) -
alloc_bytes_remaining_;
}
size_t MemoryAllocatedBytes() const { return blocks_memory_; }
size_t AllocatedAndUnused() const { return alloc_bytes_remaining_; }
// If an allocation is too big, we'll allocate an irregular block with the
// same size of that allocation.
size_t IrregularBlockNum() const { return irregular_block_num; }
size_t BlockSize() const override { return kBlockSize; }
bool IsInInlineBlock() const {
return blocks_.empty();
}
private:
char inline_block_[kInlineSize] __attribute__((__aligned__(sizeof(void*))));
// Number of bytes allocated in one block
const size_t kBlockSize;
// Array of new[] allocated memory blocks
typedef std::vector<char*> Blocks;
Blocks blocks_;
struct MmapInfo {
void* addr_;
size_t length_;
MmapInfo(void* addr, size_t length) : addr_(addr), length_(length) {}
};
std::vector<MmapInfo> huge_blocks_;
size_t irregular_block_num = 0;
// Stats for current active block.
// For each block, we allocate aligned memory chucks from one end and
// allocate unaligned memory chucks from the other end. Otherwise the
// memory waste for alignment will be higher if we allocate both types of
// memory from one direction.
char* unaligned_alloc_ptr_ = nullptr;
char* aligned_alloc_ptr_ = nullptr;
// How many bytes left in currently active block?
size_t alloc_bytes_remaining_ = 0;
#ifdef MAP_HUGETLB
size_t hugetlb_size_ = 0;
#endif // MAP_HUGETLB
char* AllocateFromHugePage(size_t bytes);
char* AllocateFallback(size_t bytes, bool aligned);
char* AllocateNewBlock(size_t block_bytes);
// Bytes of memory in blocks allocated so far
size_t blocks_memory_ = 0;
AllocTracker* tracker_;
};
inline char* Arena::Allocate(size_t bytes) {
// The semantics of what to return are a bit messy if we allow
// 0-byte allocations, so we disallow them here (we don't need
// them for our internal use).
assert(bytes > 0);
if (bytes <= alloc_bytes_remaining_) {
unaligned_alloc_ptr_ -= bytes;
alloc_bytes_remaining_ -= bytes;
return unaligned_alloc_ptr_;
}
return AllocateFallback(bytes, false /* unaligned */);
}
// check and adjust the block_size so that the return value is
// 1. in the range of [kMinBlockSize, kMaxBlockSize].
// 2. the multiple of align unit.
extern size_t OptimizeBlockSize(size_t block_size);
} // namespace rocksdb