49623f9c8e
Summary: **Context:** Through heap profiling, we discovered that `BlockBasedTableReader` objects can accumulate and lead to high memory usage (e.g, `max_open_file = -1`). These memories are currently not saved, not tracked, not constrained and not cache evict-able. As a first step to improve this, similar to https://github.com/facebook/rocksdb/pull/8428, this PR is to track an estimate of `BlockBasedTableReader` object's memory in block cache and fail future creation if the memory usage exceeds the available space of cache at the time of creation. **Summary:** - Approximate big memory users (`BlockBasedTable::Rep` and `TableProperties` )' memory usage in addition to the existing estimated ones (filter block/index block/un-compression dictionary) - Charge all of these memory usages to block cache on `BlockBasedTable::Open()` and release them on `~BlockBasedTable()` as there is no memory usage fluctuation of concern in between - Refactor on CacheReservationManager (and its call-sites) to add concurrent support for BlockBasedTable used in this PR. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9748 Test Plan: - New unit tests - db bench: `OpenDb` : **-0.52% in ms** - Setup `./db_bench -benchmarks=fillseq -db=/dev/shm/testdb -disable_auto_compactions=1 -write_buffer_size=1048576` - Repeated run with pre-change w/o feature and post-change with feature, benchmark `OpenDb`: `./db_bench -benchmarks=readrandom -use_existing_db=1 -db=/dev/shm/testdb -reserve_table_reader_memory=true (remove this when running w/o feature) -file_opening_threads=3 -open_files=-1 -report_open_timing=true| egrep 'OpenDb:'` #-run | (feature-off) avg milliseconds | std milliseconds | (feature-on) avg milliseconds | std milliseconds | change (%) -- | -- | -- | -- | -- | -- 10 | 11.4018 | 5.95173 | 9.47788 | 1.57538 | -16.87382694 20 | 9.23746 | 0.841053 | 9.32377 | 1.14074 | 0.9343477536 40 | 9.0876 | 0.671129 | 9.35053 | 1.11713 | 2.893283155 80 | 9.72514 | 2.28459 | 9.52013 | 1.0894 | -2.108041632 160 | 9.74677 | 0.991234 | 9.84743 | 1.73396 | 1.032752389 320 | 10.7297 | 5.11555 | 10.547 | 1.97692 | **-1.70275031** 640 | 11.7092 | 2.36565 | 11.7869 | 2.69377 | **0.6635807741** - db bench on write with cost to cache in WriteBufferManager (just in case this PR's CRM refactoring accidentally slows down anything in WBM) : `fillseq` : **+0.54% in micros/op** `./db_bench -benchmarks=fillseq -db=/dev/shm/testdb -disable_auto_compactions=1 -cost_write_buffer_to_cache=true -write_buffer_size=10000000000 | egrep 'fillseq'` #-run | (pre-PR) avg micros/op | std micros/op | (post-PR) avg micros/op | std micros/op | change (%) -- | -- | -- | -- | -- | -- 10 | 6.15 | 0.260187 | 6.289 | 0.371192 | 2.260162602 20 | 7.28025 | 0.465402 | 7.37255 | 0.451256 | 1.267813605 40 | 7.06312 | 0.490654 | 7.13803 | 0.478676 | **1.060579461** 80 | 7.14035 | 0.972831 | 7.14196 | 0.92971 | **0.02254791432** - filter bench: `bloom filter`: **-0.78% in ms/key** - ` ./filter_bench -impl=2 -quick -reserve_table_builder_memory=true | grep 'Build avg'` #-run | (pre-PR) avg ns/key | std ns/key | (post-PR) ns/key | std ns/key | change (%) -- | -- | -- | -- | -- | -- 10 | 26.4369 | 0.442182 | 26.3273 | 0.422919 | **-0.4145720565** 20 | 26.4451 | 0.592787 | 26.1419 | 0.62451 | **-1.1465262** - Crash test `python3 tools/db_crashtest.py blackbox --reserve_table_reader_memory=1 --cache_size=1` killed as normal Reviewed By: ajkr Differential Revision: D35136549 Pulled By: hx235 fbshipit-source-id: 146978858d0f900f43f4eb09bfd3e83195e3be28
289 lines
12 KiB
C++
289 lines
12 KiB
C++
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
//
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <vector>
|
|
|
|
#include "cache/cache_entry_roles.h"
|
|
#include "cache/cache_key.h"
|
|
#include "rocksdb/cache.h"
|
|
#include "rocksdb/slice.h"
|
|
#include "rocksdb/status.h"
|
|
#include "util/coding.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
// CacheReservationManager is an interface for reserving cache space for the
|
|
// memory used
|
|
class CacheReservationManager {
|
|
public:
|
|
// CacheReservationHandle is for managing the lifetime of a cache reservation
|
|
// for an incremental amount of memory used (i.e, incremental_memory_used)
|
|
class CacheReservationHandle {
|
|
public:
|
|
virtual ~CacheReservationHandle() {}
|
|
};
|
|
virtual ~CacheReservationManager() {}
|
|
virtual Status UpdateCacheReservation(std::size_t new_memory_used) = 0;
|
|
virtual Status MakeCacheReservation(
|
|
std::size_t incremental_memory_used,
|
|
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
|
|
*handle) = 0;
|
|
virtual std::size_t GetTotalReservedCacheSize() = 0;
|
|
virtual std::size_t GetTotalMemoryUsed() = 0;
|
|
};
|
|
|
|
// CacheReservationManagerImpl implements interface CacheReservationManager
|
|
// for reserving cache space for the memory used by inserting/releasing dummy
|
|
// entries in the cache.
|
|
//
|
|
// This class is NOT thread-safe, except that GetTotalReservedCacheSize()
|
|
// can be called without external synchronization.
|
|
template <CacheEntryRole R>
|
|
class CacheReservationManagerImpl
|
|
: public CacheReservationManager,
|
|
public std::enable_shared_from_this<CacheReservationManagerImpl<R>> {
|
|
public:
|
|
class CacheReservationHandle
|
|
: public CacheReservationManager::CacheReservationHandle {
|
|
public:
|
|
CacheReservationHandle(
|
|
std::size_t incremental_memory_used,
|
|
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr);
|
|
~CacheReservationHandle() override;
|
|
|
|
private:
|
|
std::size_t incremental_memory_used_;
|
|
std::shared_ptr<CacheReservationManagerImpl> cache_res_mgr_;
|
|
};
|
|
|
|
// Construct a CacheReservationManagerImpl
|
|
// @param cache The cache where dummy entries are inserted and released for
|
|
// reserving cache space
|
|
// @param delayed_decrease If set true, then dummy entries won't be released
|
|
// immediately when memory usage decreases.
|
|
// Instead, it will be released when the memory usage
|
|
// decreases to 3/4 of what we have reserved so far.
|
|
// This is for saving some future dummy entry
|
|
// insertion when memory usage increases are likely to
|
|
// happen in the near future.
|
|
//
|
|
// REQUIRED: cache is not nullptr
|
|
explicit CacheReservationManagerImpl(std::shared_ptr<Cache> cache,
|
|
bool delayed_decrease = false);
|
|
|
|
// no copy constructor, copy assignment, move constructor, move assignment
|
|
CacheReservationManagerImpl(const CacheReservationManagerImpl &) = delete;
|
|
CacheReservationManagerImpl &operator=(const CacheReservationManagerImpl &) =
|
|
delete;
|
|
CacheReservationManagerImpl(CacheReservationManagerImpl &&) = delete;
|
|
CacheReservationManagerImpl &operator=(CacheReservationManagerImpl &&) =
|
|
delete;
|
|
|
|
~CacheReservationManagerImpl() override;
|
|
|
|
// One of the two ways of reserving/releasing cache space,
|
|
// see MakeCacheReservation() for the other.
|
|
//
|
|
// Use ONLY one of these two ways to prevent unexpected behavior.
|
|
//
|
|
// Insert and release dummy entries in the cache to
|
|
// match the size of total dummy entries with the least multiple of
|
|
// kSizeDummyEntry greater than or equal to new_mem_used
|
|
//
|
|
// Insert dummy entries if new_memory_used > cache_allocated_size_;
|
|
//
|
|
// Release dummy entries if new_memory_used < cache_allocated_size_
|
|
// (and new_memory_used < cache_allocated_size_ * 3/4
|
|
// when delayed_decrease is set true);
|
|
//
|
|
// Keey dummy entries the same if (1) new_memory_used == cache_allocated_size_
|
|
// or (2) new_memory_used is in the interval of
|
|
// [cache_allocated_size_ * 3/4, cache_allocated_size) when delayed_decrease
|
|
// is set true.
|
|
//
|
|
// @param new_memory_used The number of bytes used by new memory
|
|
// The most recent new_memoy_used passed in will be returned
|
|
// in GetTotalMemoryUsed() even when the call return non-ok status.
|
|
//
|
|
// Since the class is NOT thread-safe, external synchronization on the
|
|
// order of calling UpdateCacheReservation() is needed if you want
|
|
// GetTotalMemoryUsed() indeed returns the latest memory used.
|
|
//
|
|
// @return On inserting dummy entries, it returns Status::OK() if all dummy
|
|
// entry insertions succeed.
|
|
// Otherwise, it returns the first non-ok status;
|
|
// On releasing dummy entries, it always returns Status::OK().
|
|
// On keeping dummy entries the same, it always returns Status::OK().
|
|
Status UpdateCacheReservation(std::size_t new_memory_used) override;
|
|
|
|
// One of the two ways of reserving cache space and releasing is done through
|
|
// destruction of CacheReservationHandle.
|
|
// See UpdateCacheReservation() for the other way.
|
|
//
|
|
// Use ONLY one of these two ways to prevent unexpected behavior.
|
|
//
|
|
// Insert dummy entries in the cache for the incremental memory usage
|
|
// to match the size of total dummy entries with the least multiple of
|
|
// kSizeDummyEntry greater than or equal to the total memory used.
|
|
//
|
|
// A CacheReservationHandle is returned as an output parameter.
|
|
// The reserved dummy entries are automatically released on the destruction of
|
|
// this handle, which achieves better RAII per cache reservation.
|
|
//
|
|
// WARNING: Deallocate all the handles of the CacheReservationManager object
|
|
// before deallocating the object to prevent unexpected behavior.
|
|
//
|
|
// @param incremental_memory_used The number of bytes increased in memory
|
|
// usage.
|
|
//
|
|
// Calling GetTotalMemoryUsed() afterward will return the total memory
|
|
// increased by this number, even when calling MakeCacheReservation()
|
|
// returns non-ok status.
|
|
//
|
|
// Since the class is NOT thread-safe, external synchronization in
|
|
// calling MakeCacheReservation() is needed if you want
|
|
// GetTotalMemoryUsed() indeed returns the latest memory used.
|
|
//
|
|
// @param handle An pointer to std::unique_ptr<CacheReservationHandle> that
|
|
// manages the lifetime of the cache reservation represented by the
|
|
// handle.
|
|
//
|
|
// @return It returns Status::OK() if all dummy
|
|
// entry insertions succeed.
|
|
// Otherwise, it returns the first non-ok status;
|
|
//
|
|
// REQUIRES: handle != nullptr
|
|
Status MakeCacheReservation(
|
|
std::size_t incremental_memory_used,
|
|
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
|
|
override;
|
|
|
|
// Return the size of the cache (which is a multiple of kSizeDummyEntry)
|
|
// successfully reserved by calling UpdateCacheReservation().
|
|
//
|
|
// When UpdateCacheReservation() returns non-ok status,
|
|
// calling GetTotalReservedCacheSize() after that might return a slightly
|
|
// smaller number than the actual reserved cache size due to
|
|
// the returned number will always be a multiple of kSizeDummyEntry
|
|
// and cache full might happen in the middle of inserting a dummy entry.
|
|
std::size_t GetTotalReservedCacheSize() override;
|
|
|
|
// Return the latest total memory used indicated by the most recent call of
|
|
// UpdateCacheReservation(std::size_t new_memory_used);
|
|
std::size_t GetTotalMemoryUsed() override;
|
|
|
|
static constexpr std::size_t GetDummyEntrySize() { return kSizeDummyEntry; }
|
|
|
|
// For testing only - it is to help ensure the NoopDeleterForRole<R>
|
|
// accessed from CacheReservationManagerImpl and the one accessed from the
|
|
// test are from the same translation units
|
|
static Cache::DeleterFn TEST_GetNoopDeleterForRole();
|
|
|
|
private:
|
|
static constexpr std::size_t kSizeDummyEntry = 256 * 1024;
|
|
|
|
Slice GetNextCacheKey();
|
|
|
|
Status ReleaseCacheReservation(std::size_t incremental_memory_used);
|
|
Status IncreaseCacheReservation(std::size_t new_mem_used);
|
|
Status DecreaseCacheReservation(std::size_t new_mem_used);
|
|
|
|
std::shared_ptr<Cache> cache_;
|
|
bool delayed_decrease_;
|
|
std::atomic<std::size_t> cache_allocated_size_;
|
|
std::size_t memory_used_;
|
|
std::vector<Cache::Handle *> dummy_handles_;
|
|
CacheKey cache_key_;
|
|
};
|
|
|
|
class ConcurrentCacheReservationManager
|
|
: public CacheReservationManager,
|
|
public std::enable_shared_from_this<ConcurrentCacheReservationManager> {
|
|
public:
|
|
class CacheReservationHandle
|
|
: public CacheReservationManager::CacheReservationHandle {
|
|
public:
|
|
CacheReservationHandle(
|
|
std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr,
|
|
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
|
|
cache_res_handle) {
|
|
assert(cache_res_mgr && cache_res_handle);
|
|
cache_res_mgr_ = cache_res_mgr;
|
|
cache_res_handle_ = std::move(cache_res_handle);
|
|
}
|
|
|
|
~CacheReservationHandle() override {
|
|
std::lock_guard<std::mutex> lock(cache_res_mgr_->cache_res_mgr_mu_);
|
|
cache_res_handle_.reset();
|
|
}
|
|
|
|
private:
|
|
std::shared_ptr<ConcurrentCacheReservationManager> cache_res_mgr_;
|
|
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
|
|
cache_res_handle_;
|
|
};
|
|
|
|
explicit ConcurrentCacheReservationManager(
|
|
std::shared_ptr<CacheReservationManager> cache_res_mgr) {
|
|
cache_res_mgr_ = std::move(cache_res_mgr);
|
|
}
|
|
ConcurrentCacheReservationManager(const ConcurrentCacheReservationManager &) =
|
|
delete;
|
|
ConcurrentCacheReservationManager &operator=(
|
|
const ConcurrentCacheReservationManager &) = delete;
|
|
ConcurrentCacheReservationManager(ConcurrentCacheReservationManager &&) =
|
|
delete;
|
|
ConcurrentCacheReservationManager &operator=(
|
|
ConcurrentCacheReservationManager &&) = delete;
|
|
|
|
~ConcurrentCacheReservationManager() override {}
|
|
|
|
inline Status UpdateCacheReservation(std::size_t new_memory_used) override {
|
|
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
|
|
return cache_res_mgr_->UpdateCacheReservation(new_memory_used);
|
|
}
|
|
inline Status MakeCacheReservation(
|
|
std::size_t incremental_memory_used,
|
|
std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
|
|
override {
|
|
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
|
|
wrapped_handle;
|
|
Status s;
|
|
{
|
|
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
|
|
s = cache_res_mgr_->MakeCacheReservation(incremental_memory_used,
|
|
&wrapped_handle);
|
|
}
|
|
(*handle).reset(
|
|
new ConcurrentCacheReservationManager::CacheReservationHandle(
|
|
std::enable_shared_from_this<
|
|
ConcurrentCacheReservationManager>::shared_from_this(),
|
|
std::move(wrapped_handle)));
|
|
return s;
|
|
}
|
|
inline std::size_t GetTotalReservedCacheSize() override {
|
|
return cache_res_mgr_->GetTotalReservedCacheSize();
|
|
}
|
|
inline std::size_t GetTotalMemoryUsed() override {
|
|
std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
|
|
return cache_res_mgr_->GetTotalMemoryUsed();
|
|
}
|
|
|
|
private:
|
|
std::mutex cache_res_mgr_mu_;
|
|
std::shared_ptr<CacheReservationManager> cache_res_mgr_;
|
|
};
|
|
} // namespace ROCKSDB_NAMESPACE
|