// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include <atomic> #include <memory> #include <utility> #include "port/likely.h" #include "util/allocator.h" #include "util/arena.h" #include "util/core_local.h" #include "util/mutexlock.h" #include "util/thread_local.h" // Only generate field unused warning for padding array, or build under // GCC 4.8.1 will fail. #ifdef __clang__ #define ROCKSDB_FIELD_UNUSED __attribute__((__unused__)) #else #define ROCKSDB_FIELD_UNUSED #endif // __clang__ namespace rocksdb { class Logger; // ConcurrentArena wraps an Arena. It makes it thread safe using a fast // inlined spinlock, and adds small per-core allocation caches to avoid // contention for small allocations. To avoid any memory waste from the // per-core shards, they are kept small, they are lazily instantiated // only if ConcurrentArena actually notices concurrent use, and they // adjust their size so that there is no fragmentation waste when the // shard blocks are allocated from the underlying main arena. class ConcurrentArena : public Allocator { public: // block_size and huge_page_size are the same as for Arena (and are // in fact just passed to the constructor of arena_. The core-local // shards compute their shard_block_size as a fraction of block_size // that varies according to the hardware concurrency level. explicit ConcurrentArena(size_t block_size = Arena::kMinBlockSize, AllocTracker* tracker = nullptr, size_t huge_page_size = 0); char* Allocate(size_t bytes) override { return AllocateImpl(bytes, false /*force_arena*/, [=]() { return arena_.Allocate(bytes); }); } char* AllocateAligned(size_t bytes, size_t huge_page_size = 0, Logger* logger = nullptr) override { size_t rounded_up = ((bytes - 1) | (sizeof(void*) - 1)) + 1; assert(rounded_up >= bytes && rounded_up < bytes + sizeof(void*) && (rounded_up % sizeof(void*)) == 0); return AllocateImpl(rounded_up, huge_page_size != 0 /*force_arena*/, [=]() { return arena_.AllocateAligned(rounded_up, huge_page_size, logger); }); } size_t ApproximateMemoryUsage() const { std::unique_lock<SpinMutex> lock(arena_mutex_, std::defer_lock); lock.lock(); return arena_.ApproximateMemoryUsage() - ShardAllocatedAndUnused(); } size_t MemoryAllocatedBytes() const { return memory_allocated_bytes_.load(std::memory_order_relaxed); } size_t AllocatedAndUnused() const { return arena_allocated_and_unused_.load(std::memory_order_relaxed) + ShardAllocatedAndUnused(); } size_t IrregularBlockNum() const { return irregular_block_num_.load(std::memory_order_relaxed); } size_t BlockSize() const override { return arena_.BlockSize(); } private: struct Shard { char padding[40] ROCKSDB_FIELD_UNUSED; mutable SpinMutex mutex; char* free_begin_; std::atomic<size_t> allocated_and_unused_; Shard() : allocated_and_unused_(0) {} }; #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL static __thread size_t tls_cpuid; #else enum ZeroFirstEnum : size_t { tls_cpuid = 0 }; #endif char padding0[56] ROCKSDB_FIELD_UNUSED; size_t shard_block_size_; CoreLocalArray<Shard> shards_; Arena arena_; mutable SpinMutex arena_mutex_; std::atomic<size_t> arena_allocated_and_unused_; std::atomic<size_t> memory_allocated_bytes_; std::atomic<size_t> irregular_block_num_; char padding1[56] ROCKSDB_FIELD_UNUSED; Shard* Repick(); size_t ShardAllocatedAndUnused() const { size_t total = 0; for (size_t i = 0; i < shards_.Size(); ++i) { total += shards_.AccessAtCore(i)->allocated_and_unused_.load( std::memory_order_relaxed); } return total; } template <typename Func> char* AllocateImpl(size_t bytes, bool force_arena, const Func& func) { size_t cpu; // Go directly to the arena if the allocation is too large, or if // we've never needed to Repick() and the arena mutex is available // with no waiting. This keeps the fragmentation penalty of // concurrency zero unless it might actually confer an advantage. std::unique_lock<SpinMutex> arena_lock(arena_mutex_, std::defer_lock); if (bytes > shard_block_size_ / 4 || force_arena || ((cpu = tls_cpuid) == 0 && !shards_.AccessAtCore(0)->allocated_and_unused_.load( std::memory_order_relaxed) && arena_lock.try_lock())) { if (!arena_lock.owns_lock()) { arena_lock.lock(); } auto rv = func(); Fixup(); return rv; } // pick a shard from which to allocate Shard* s = shards_.AccessAtCore(cpu & (shards_.Size() - 1)); if (!s->mutex.try_lock()) { s = Repick(); s->mutex.lock(); } std::unique_lock<SpinMutex> lock(s->mutex, std::adopt_lock); size_t avail = s->allocated_and_unused_.load(std::memory_order_relaxed); if (avail < bytes) { // reload std::lock_guard<SpinMutex> reload_lock(arena_mutex_); // If the arena's current block is within a factor of 2 of the right // size, we adjust our request to avoid arena waste. auto exact = arena_allocated_and_unused_.load(std::memory_order_relaxed); assert(exact == arena_.AllocatedAndUnused()); if (exact >= bytes && arena_.IsInInlineBlock()) { // If we haven't exhausted arena's inline block yet, allocate from arena // directly. This ensures that we'll do the first few small allocations // without allocating any blocks. // In particular this prevents empty memtables from using // disproportionately large amount of memory: a memtable allocates on // the order of 1 KB of memory when created; we wouldn't want to // allocate a full arena block (typically a few megabytes) for that, // especially if there are thousands of empty memtables. auto rv = func(); Fixup(); return rv; } avail = exact >= shard_block_size_ / 2 && exact < shard_block_size_ * 2 ? exact : shard_block_size_; s->free_begin_ = arena_.AllocateAligned(avail); Fixup(); } s->allocated_and_unused_.store(avail - bytes, std::memory_order_relaxed); char* rv; if ((bytes % sizeof(void*)) == 0) { // aligned allocation from the beginning rv = s->free_begin_; s->free_begin_ += bytes; } else { // unaligned from the end rv = s->free_begin_ + avail - bytes; } return rv; } void Fixup() { arena_allocated_and_unused_.store(arena_.AllocatedAndUnused(), std::memory_order_relaxed); memory_allocated_bytes_.store(arena_.MemoryAllocatedBytes(), std::memory_order_relaxed); irregular_block_num_.store(arena_.IrregularBlockNum(), std::memory_order_relaxed); } ConcurrentArena(const ConcurrentArena&) = delete; ConcurrentArena& operator=(const ConcurrentArena&) = delete; }; } // namespace rocksdb