netty5/buffer/src/main/java/io/net5/buffer/PooledByteBufAllocator.java

746 lines
28 KiB
Java
Raw Normal View History

/*
* Copyright 2012 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
2021-09-17 16:28:14 +02:00
package io.net5.buffer;
2021-09-17 16:28:14 +02:00
import static io.net5.util.internal.ObjectUtil.checkPositiveOrZero;
2021-09-17 16:28:14 +02:00
import io.net5.util.NettyRuntime;
import io.net5.util.concurrent.EventExecutor;
import io.net5.util.concurrent.FastThreadLocal;
import io.net5.util.concurrent.FastThreadLocalThread;
import io.net5.util.internal.PlatformDependent;
import io.net5.util.internal.StringUtil;
import io.net5.util.internal.SystemPropertyUtil;
import io.net5.util.internal.ThreadExecutorMap;
import io.net5.util.internal.logging.InternalLogger;
import io.net5.util.internal.logging.InternalLoggerFactory;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
public class PooledByteBufAllocator extends AbstractByteBufAllocator implements ByteBufAllocatorMetricProvider {
private static final InternalLogger logger = InternalLoggerFactory.getInstance(PooledByteBufAllocator.class);
private static final int DEFAULT_NUM_HEAP_ARENA;
private static final int DEFAULT_NUM_DIRECT_ARENA;
private static final int DEFAULT_PAGE_SIZE;
private static final int DEFAULT_MAX_ORDER; // 8192 << 11 = 16 MiB per chunk
private static final int DEFAULT_SMALL_CACHE_SIZE;
private static final int DEFAULT_NORMAL_CACHE_SIZE;
static final int DEFAULT_MAX_CACHED_BUFFER_CAPACITY;
private static final int DEFAULT_CACHE_TRIM_INTERVAL;
private static final long DEFAULT_CACHE_TRIM_INTERVAL_MILLIS;
private static final boolean DEFAULT_USE_CACHE_FOR_ALL_THREADS;
private static final int DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT;
static final int DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK;
private static final int MIN_PAGE_SIZE = 4096;
private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
private final Runnable trimTask = this::trimCurrentThreadCache;
static {
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
int defaultAlignment = SystemPropertyUtil.getInt(
2021-09-17 16:28:14 +02:00
"io.net5.allocator.directMemoryCacheAlignment", 0);
int defaultPageSize = SystemPropertyUtil.getInt("io.net5.allocator.pageSize", 8192);
Throwable pageSizeFallbackCause = null;
try {
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
validateAndCalculatePageShifts(defaultPageSize, defaultAlignment);
} catch (Throwable t) {
pageSizeFallbackCause = t;
defaultPageSize = 8192;
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
defaultAlignment = 0;
}
DEFAULT_PAGE_SIZE = defaultPageSize;
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT = defaultAlignment;
2021-09-17 16:28:14 +02:00
int defaultMaxOrder = SystemPropertyUtil.getInt("io.net5.allocator.maxOrder", 11);
Throwable maxOrderFallbackCause = null;
try {
validateAndCalculateChunkSize(DEFAULT_PAGE_SIZE, defaultMaxOrder);
} catch (Throwable t) {
maxOrderFallbackCause = t;
defaultMaxOrder = 11;
}
DEFAULT_MAX_ORDER = defaultMaxOrder;
// Determine reasonable default for nHeapArena and nDirectArena.
// Assuming each arena has 3 chunks, the pool should not consume more than 50% of max memory.
final Runtime runtime = Runtime.getRuntime();
Enable configuring available processors Motivation: In cases when an application is running in a container or is otherwise constrained to the number of processors that it is using, the JVM invocation Runtime#availableProcessors will not return the constrained value but rather the number of processors available to the virtual machine. Netty uses this number in sizing various resources. Additionally, some applications will constrain the number of threads that they are using independenly of the number of processors available on the system. Thus, applications should have a way to globally configure the number of processors. Modifications: Rather than invoking Runtime#availableProcessors, Netty should rely on a method that enables configuration when the JVM is started or by the application. This commit exposes a new class NettyRuntime for enabling such configuraiton. This value can only be set once. Its default value is Runtime#availableProcessors so that there is no visible change to existing applications, but enables configuring either a system property or configuring during application startup (e.g., based on settings used to configure the application). Additionally, we introduce the usage of forbidden-apis to prevent future uses of Runtime#availableProcessors from creeping. Future work should enable the bundled signatures and clean up uses of deprecated and other forbidden methods. Result: Netty can be configured to not use the underlying number of processors, but rather the constrained number of processors.
2017-01-16 18:36:32 +01:00
/*
* We use 2 * available processors by default to reduce contention as we use 2 * available processors for the
* number of EventLoops in NIO and EPOLL as well. If we choose a smaller number we will run into hot spots as
* allocation and de-allocation needs to be synchronized on the PoolArena.
*
* See https://github.com/netty/netty/issues/3888.
*/
final int defaultMinNumArena = NettyRuntime.availableProcessors() * 2;
final int defaultChunkSize = DEFAULT_PAGE_SIZE << DEFAULT_MAX_ORDER;
DEFAULT_NUM_HEAP_ARENA = Math.max(0,
SystemPropertyUtil.getInt(
2021-09-17 16:28:14 +02:00
"io.net5.allocator.numHeapArenas",
(int) Math.min(
defaultMinNumArena,
runtime.maxMemory() / defaultChunkSize / 2 / 3)));
DEFAULT_NUM_DIRECT_ARENA = Math.max(0,
SystemPropertyUtil.getInt(
2021-09-17 16:28:14 +02:00
"io.net5.allocator.numDirectArenas",
(int) Math.min(
defaultMinNumArena,
PlatformDependent.maxDirectMemory() / defaultChunkSize / 2 / 3)));
// cache sizes
2021-09-17 16:28:14 +02:00
DEFAULT_SMALL_CACHE_SIZE = SystemPropertyUtil.getInt("io.net5.allocator.smallCacheSize", 256);
DEFAULT_NORMAL_CACHE_SIZE = SystemPropertyUtil.getInt("io.net5.allocator.normalCacheSize", 64);
// 32 kb is the default maximum capacity of the cached buffer. Similar to what is explained in
// 'Scalable memory allocation using jemalloc'
DEFAULT_MAX_CACHED_BUFFER_CAPACITY = SystemPropertyUtil.getInt(
2021-09-17 16:28:14 +02:00
"io.net5.allocator.maxCachedBufferCapacity", 32 * 1024);
// the number of threshold of allocations when cached entries will be freed up if not frequently used
DEFAULT_CACHE_TRIM_INTERVAL = SystemPropertyUtil.getInt(
2021-09-17 16:28:14 +02:00
"io.net5.allocator.cacheTrimInterval", 8192);
DEFAULT_CACHE_TRIM_INTERVAL_MILLIS = SystemPropertyUtil.getLong(
2021-09-17 16:28:14 +02:00
"io.net5.allocator.cacheTrimIntervalMillis", 0);
DEFAULT_USE_CACHE_FOR_ALL_THREADS = SystemPropertyUtil.getBoolean(
2021-09-17 16:28:14 +02:00
"io.net5.allocator.useCacheForAllThreads", false);
// Use 1023 by default as we use an ArrayDeque as backing storage which will then allocate an internal array
// of 1024 elements. Otherwise we would allocate 2048 and only use 1024 which is wasteful.
DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK = SystemPropertyUtil.getInt(
2021-09-17 16:28:14 +02:00
"io.net5.allocator.maxCachedByteBuffersPerChunk", 1023);
if (logger.isDebugEnabled()) {
2021-09-17 16:28:14 +02:00
logger.debug("-Dio.net5.allocator.numHeapArenas: {}", DEFAULT_NUM_HEAP_ARENA);
logger.debug("-Dio.net5.allocator.numDirectArenas: {}", DEFAULT_NUM_DIRECT_ARENA);
if (pageSizeFallbackCause == null) {
2021-09-17 16:28:14 +02:00
logger.debug("-Dio.net5.allocator.pageSize: {}", DEFAULT_PAGE_SIZE);
} else {
2021-09-17 16:28:14 +02:00
logger.debug("-Dio.net5.allocator.pageSize: {}", DEFAULT_PAGE_SIZE, pageSizeFallbackCause);
}
if (maxOrderFallbackCause == null) {
2021-09-17 16:28:14 +02:00
logger.debug("-Dio.net5.allocator.maxOrder: {}", DEFAULT_MAX_ORDER);
} else {
2021-09-17 16:28:14 +02:00
logger.debug("-Dio.net5.allocator.maxOrder: {}", DEFAULT_MAX_ORDER, maxOrderFallbackCause);
}
2021-09-17 16:28:14 +02:00
logger.debug("-Dio.net5.allocator.chunkSize: {}", DEFAULT_PAGE_SIZE << DEFAULT_MAX_ORDER);
logger.debug("-Dio.net5.allocator.smallCacheSize: {}", DEFAULT_SMALL_CACHE_SIZE);
logger.debug("-Dio.net5.allocator.normalCacheSize: {}", DEFAULT_NORMAL_CACHE_SIZE);
logger.debug("-Dio.net5.allocator.maxCachedBufferCapacity: {}", DEFAULT_MAX_CACHED_BUFFER_CAPACITY);
logger.debug("-Dio.net5.allocator.cacheTrimInterval: {}", DEFAULT_CACHE_TRIM_INTERVAL);
logger.debug("-Dio.net5.allocator.cacheTrimIntervalMillis: {}", DEFAULT_CACHE_TRIM_INTERVAL_MILLIS);
logger.debug("-Dio.net5.allocator.useCacheForAllThreads: {}", DEFAULT_USE_CACHE_FOR_ALL_THREADS);
logger.debug("-Dio.net5.allocator.maxCachedByteBuffersPerChunk: {}",
DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK);
}
}
public static final PooledByteBufAllocator DEFAULT =
new PooledByteBufAllocator(PlatformDependent.directBufferPreferred());
private final PoolArena<byte[]>[] heapArenas;
private final PoolArena<ByteBuffer>[] directArenas;
private final int smallCacheSize;
private final int normalCacheSize;
private final List<PoolArenaMetric> heapArenaMetrics;
private final List<PoolArenaMetric> directArenaMetrics;
private final PoolThreadLocalCache threadCache;
private final int chunkSize;
private final PooledByteBufAllocatorMetric metric;
public PooledByteBufAllocator() {
this(false);
}
@SuppressWarnings("deprecation")
public PooledByteBufAllocator(boolean preferDirect) {
this(preferDirect, DEFAULT_NUM_HEAP_ARENA, DEFAULT_NUM_DIRECT_ARENA, DEFAULT_PAGE_SIZE, DEFAULT_MAX_ORDER);
}
@SuppressWarnings("deprecation")
public PooledByteBufAllocator(int nHeapArena, int nDirectArena, int pageSize, int maxOrder) {
this(false, nHeapArena, nDirectArena, pageSize, maxOrder);
}
/**
* @deprecated use
* {@link PooledByteBufAllocator#PooledByteBufAllocator(boolean, int, int, int, int, int, int, boolean)}
*/
@Deprecated
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder) {
this(preferDirect, nHeapArena, nDirectArena, pageSize, maxOrder,
0, DEFAULT_SMALL_CACHE_SIZE, DEFAULT_NORMAL_CACHE_SIZE);
}
/**
* @deprecated use
* {@link PooledByteBufAllocator#PooledByteBufAllocator(boolean, int, int, int, int, int, int, boolean)}
*/
@Deprecated
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder,
int tinyCacheSize, int smallCacheSize, int normalCacheSize) {
this(preferDirect, nHeapArena, nDirectArena, pageSize, maxOrder, smallCacheSize,
normalCacheSize, DEFAULT_USE_CACHE_FOR_ALL_THREADS, DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT);
}
/**
* @deprecated use
* {@link PooledByteBufAllocator#PooledByteBufAllocator(boolean, int, int, int, int, int, int, boolean)}
*/
@Deprecated
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena,
int nDirectArena, int pageSize, int maxOrder, int tinyCacheSize,
int smallCacheSize, int normalCacheSize,
boolean useCacheForAllThreads) {
this(preferDirect, nHeapArena, nDirectArena, pageSize, maxOrder,
smallCacheSize, normalCacheSize,
useCacheForAllThreads);
}
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena,
int nDirectArena, int pageSize, int maxOrder,
int smallCacheSize, int normalCacheSize,
boolean useCacheForAllThreads) {
this(preferDirect, nHeapArena, nDirectArena, pageSize, maxOrder,
smallCacheSize, normalCacheSize,
useCacheForAllThreads, DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT);
}
/**
* @deprecated use
* {@link PooledByteBufAllocator#PooledByteBufAllocator(boolean, int, int, int, int, int, int, boolean, int)}
*/
@Deprecated
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder,
int tinyCacheSize, int smallCacheSize, int normalCacheSize,
boolean useCacheForAllThreads, int directMemoryCacheAlignment) {
this(preferDirect, nHeapArena, nDirectArena, pageSize, maxOrder,
smallCacheSize, normalCacheSize,
useCacheForAllThreads, directMemoryCacheAlignment);
}
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder,
int smallCacheSize, int normalCacheSize,
boolean useCacheForAllThreads, int directMemoryCacheAlignment) {
super(preferDirect);
threadCache = new PoolThreadLocalCache(useCacheForAllThreads);
this.smallCacheSize = smallCacheSize;
this.normalCacheSize = normalCacheSize;
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
if (directMemoryCacheAlignment != 0) {
if (!PlatformDependent.hasAlignDirectByteBuffer()) {
throw new UnsupportedOperationException("Buffer alignment is not supported. " +
"Either Unsafe or ByteBuffer.alignSlice() must be available.");
}
// Ensure page size is a whole multiple of the alignment, or bump it to the next whole multiple.
pageSize = (int) PlatformDependent.align(pageSize, directMemoryCacheAlignment);
}
chunkSize = validateAndCalculateChunkSize(pageSize, maxOrder);
checkPositiveOrZero(nHeapArena, "nHeapArena");
checkPositiveOrZero(nDirectArena, "nDirectArena");
checkPositiveOrZero(directMemoryCacheAlignment, "directMemoryCacheAlignment");
if (directMemoryCacheAlignment > 0 && !isDirectMemoryCacheAlignmentSupported()) {
throw new IllegalArgumentException("directMemoryCacheAlignment is not supported");
}
if ((directMemoryCacheAlignment & -directMemoryCacheAlignment) != directMemoryCacheAlignment) {
throw new IllegalArgumentException("directMemoryCacheAlignment: "
+ directMemoryCacheAlignment + " (expected: power of two)");
}
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
int pageShifts = validateAndCalculatePageShifts(pageSize, directMemoryCacheAlignment);
if (nHeapArena > 0) {
heapArenas = newArenaArray(nHeapArena);
List<PoolArenaMetric> metrics = new ArrayList<>(heapArenas.length);
for (int i = 0; i < heapArenas.length; i ++) {
PoolArena.HeapArena arena = new PoolArena.HeapArena(this,
pageSize, pageShifts, chunkSize,
directMemoryCacheAlignment);
heapArenas[i] = arena;
metrics.add(arena);
}
heapArenaMetrics = Collections.unmodifiableList(metrics);
} else {
heapArenas = null;
heapArenaMetrics = Collections.emptyList();
}
if (nDirectArena > 0) {
directArenas = newArenaArray(nDirectArena);
List<PoolArenaMetric> metrics = new ArrayList<>(directArenas.length);
for (int i = 0; i < directArenas.length; i ++) {
PoolArena.DirectArena arena = new PoolArena.DirectArena(
this, pageSize, pageShifts, chunkSize, directMemoryCacheAlignment);
directArenas[i] = arena;
metrics.add(arena);
}
directArenaMetrics = Collections.unmodifiableList(metrics);
} else {
directArenas = null;
directArenaMetrics = Collections.emptyList();
}
metric = new PooledByteBufAllocatorMetric(this);
}
@SuppressWarnings("unchecked")
private static <T> PoolArena<T>[] newArenaArray(int size) {
return new PoolArena[size];
}
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
private static int validateAndCalculatePageShifts(int pageSize, int alignment) {
if (pageSize < MIN_PAGE_SIZE) {
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
throw new IllegalArgumentException("pageSize: " + pageSize + " (expected: " + MIN_PAGE_SIZE + ')');
}
if ((pageSize & pageSize - 1) != 0) {
throw new IllegalArgumentException("pageSize: " + pageSize + " (expected: power of 2)");
}
Fix alignment handling for pooled direct buffers (#11106) Motivation: Alignment handling was broken, and basically turned into a fixed offset into each allocation address regardless of its initial value, instead of ensuring that the allocated address is either aligned or bumped to the nearest alignment offset. The brokenness of the alignment handling extended so far, that overlapping ByteBuf instances could even be created, as was seen in #11101. Modification: Instead of fixing the per-allocation pointer bump, we now ensure that 1) the minimum page size is a whole multiple of the alignment, and 2) the reference memory for each chunk is bumped to the nearest aligned address, and finally 3) ensured that the reservations are whole multiples of the alignment, thus ensuring that the next allocation automatically occurs from an aligned address. Incidentally, (3) above comes for free because the reservations are in whole pages, and in (1) we ensured that pages are sized in whole multiples of the alignment. In order to ensure that the memory for a chunk is aligned, we introduce some new PlatformDependent infrastructure. The PlatformDependent.alignDirectBuffer will produce a slice of the given buffer, and the slice will have an address that is aligned. This method is plainly available on ByteBuffer in Java 9 onwards, but for pre-9 we have to use Unsafe, which means it can fail and might not be available on all platforms. Attempts to create a PooledByteBufAllocator that uses alignment, when this is not supported, will throw an exception. Luckily, I think use of aligned allocations are rare. Result: Aligned pooled byte bufs now work correctly, and never have any overlap. Fixes #11101
2021-03-23 17:07:06 +01:00
if (pageSize < alignment) {
throw new IllegalArgumentException("Alignment cannot be greater than page size. " +
"Alignment: " + alignment + ", page size: " + pageSize + '.');
}
// Logarithm base 2. At this point we know that pageSize is a power of two.
return Integer.SIZE - 1 - Integer.numberOfLeadingZeros(pageSize);
}
private static int validateAndCalculateChunkSize(int pageSize, int maxOrder) {
if (maxOrder > 14) {
throw new IllegalArgumentException("maxOrder: " + maxOrder + " (expected: 0-14)");
}
// Ensure the resulting chunkSize does not overflow.
int chunkSize = pageSize;
for (int i = maxOrder; i > 0; i --) {
if (chunkSize > MAX_CHUNK_SIZE / 2) {
throw new IllegalArgumentException(String.format(
"pageSize (%d) << maxOrder (%d) must not exceed %d", pageSize, maxOrder, MAX_CHUNK_SIZE));
}
chunkSize <<= 1;
}
return chunkSize;
}
@Override
protected ByteBuf newHeapBuffer(int initialCapacity, int maxCapacity) {
PoolThreadCache cache = threadCache.get();
PoolArena<byte[]> heapArena = cache.heapArena;
final ByteBuf buf;
if (heapArena != null) {
buf = heapArena.allocate(cache, initialCapacity, maxCapacity);
} else {
buf = PlatformDependent.hasUnsafe() ?
new UnpooledUnsafeHeapByteBuf(this, initialCapacity, maxCapacity) :
new UnpooledHeapByteBuf(this, initialCapacity, maxCapacity);
}
return toLeakAwareBuffer(buf);
}
@Override
protected ByteBuf newDirectBuffer(int initialCapacity, int maxCapacity) {
PoolThreadCache cache = threadCache.get();
PoolArena<ByteBuffer> directArena = cache.directArena;
final ByteBuf buf;
if (directArena != null) {
buf = directArena.allocate(cache, initialCapacity, maxCapacity);
} else {
buf = PlatformDependent.hasUnsafe() ?
UnsafeByteBufUtil.newUnsafeDirectByteBuf(this, initialCapacity, maxCapacity) :
new UnpooledDirectByteBuf(this, initialCapacity, maxCapacity);
}
return toLeakAwareBuffer(buf);
}
/**
2021-09-17 16:28:14 +02:00
* Default number of heap arenas - System Property: io.net5.allocator.numHeapArenas - default 2 * cores
*/
public static int defaultNumHeapArena() {
return DEFAULT_NUM_HEAP_ARENA;
}
/**
2021-09-17 16:28:14 +02:00
* Default number of direct arenas - System Property: io.net5.allocator.numDirectArenas - default 2 * cores
*/
public static int defaultNumDirectArena() {
return DEFAULT_NUM_DIRECT_ARENA;
}
/**
2021-09-17 16:28:14 +02:00
* Default buffer page size - System Property: io.net5.allocator.pageSize - default 8192
*/
public static int defaultPageSize() {
return DEFAULT_PAGE_SIZE;
}
/**
2021-09-17 16:28:14 +02:00
* Default maximum order - System Property: io.net5.allocator.maxOrder - default 11
*/
public static int defaultMaxOrder() {
return DEFAULT_MAX_ORDER;
}
/**
2021-09-17 16:28:14 +02:00
* Default thread caching behavior - System Property: io.net5.allocator.useCacheForAllThreads - default true
*/
public static boolean defaultUseCacheForAllThreads() {
return DEFAULT_USE_CACHE_FOR_ALL_THREADS;
}
/**
2021-09-17 16:28:14 +02:00
* Default prefer direct - System Property: io.net5.noPreferDirect - default false
*/
public static boolean defaultPreferDirect() {
return PlatformDependent.directBufferPreferred();
}
/**
* Default tiny cache size - default 0
*
* @deprecated Tiny caches have been merged into small caches.
*/
@Deprecated
public static int defaultTinyCacheSize() {
return 0;
}
/**
2021-09-17 16:28:14 +02:00
* Default small cache size - System Property: io.net5.allocator.smallCacheSize - default 256
*/
public static int defaultSmallCacheSize() {
return DEFAULT_SMALL_CACHE_SIZE;
}
/**
2021-09-17 16:28:14 +02:00
* Default normal cache size - System Property: io.net5.allocator.normalCacheSize - default 64
*/
public static int defaultNormalCacheSize() {
return DEFAULT_NORMAL_CACHE_SIZE;
}
/**
2017-04-19 22:37:03 +02:00
* Return {@code true} if direct memory cache alignment is supported, {@code false} otherwise.
*/
public static boolean isDirectMemoryCacheAlignmentSupported() {
return PlatformDependent.hasUnsafe();
}
@Override
public boolean isDirectBufferPooled() {
return directArenas != null;
}
/**
* Returns {@code true} if the calling {@link Thread} has a {@link ThreadLocal} cache for the allocated
* buffers.
*/
@Deprecated
public boolean hasThreadLocalCache() {
Refactor FastThreadLocal to simplify TLV management Motivation: When Netty runs in a managed environment such as web application server, Netty needs to provide an explicit way to remove the thread-local variables it created to prevent class loader leaks. FastThreadLocal uses different execution paths for storing a thread-local variable depending on the type of the current thread. It increases the complexity of thread-local removal. Modifications: - Moved FastThreadLocal and FastThreadLocalThread out of the internal package so that a user can use it. - FastThreadLocal now keeps track of all thread local variables it has initialized, and calling FastThreadLocal.removeAll() will remove all thread-local variables of the caller thread. - Added FastThreadLocal.size() for diagnostics and tests - Introduce InternalThreadLocalMap which is a mixture of hard-wired thread local variable fields and extensible indexed variables - FastThreadLocal now uses InternalThreadLocalMap to implement a thread-local variable. - Added ThreadDeathWatcher.unwatch() so that PooledByteBufAllocator tells it to stop watching when its thread-local cache has been freed by FastThreadLocal.removeAll(). - Added FastThreadLocalTest to ensure that removeAll() works - Added microbenchmark for FastThreadLocal and JDK ThreadLocal - Upgraded to JMH 0.9 Result: - A user can remove all thread-local variables Netty created, as long as he or she did not exit from the current thread. (Note that there's no way to remove a thread-local variable from outside of the thread.) - FastThreadLocal exposes more useful operations such as isSet() because we always implement a thread local variable via InternalThreadLocalMap instead of falling back to JDK ThreadLocal. - FastThreadLocalBenchmark shows that this change improves the performance of FastThreadLocal even more.
2014-06-17 11:37:58 +02:00
return threadCache.isSet();
}
/**
* Free all cached buffers for the calling {@link Thread}.
*/
@Deprecated
public void freeThreadLocalCache() {
Refactor FastThreadLocal to simplify TLV management Motivation: When Netty runs in a managed environment such as web application server, Netty needs to provide an explicit way to remove the thread-local variables it created to prevent class loader leaks. FastThreadLocal uses different execution paths for storing a thread-local variable depending on the type of the current thread. It increases the complexity of thread-local removal. Modifications: - Moved FastThreadLocal and FastThreadLocalThread out of the internal package so that a user can use it. - FastThreadLocal now keeps track of all thread local variables it has initialized, and calling FastThreadLocal.removeAll() will remove all thread-local variables of the caller thread. - Added FastThreadLocal.size() for diagnostics and tests - Introduce InternalThreadLocalMap which is a mixture of hard-wired thread local variable fields and extensible indexed variables - FastThreadLocal now uses InternalThreadLocalMap to implement a thread-local variable. - Added ThreadDeathWatcher.unwatch() so that PooledByteBufAllocator tells it to stop watching when its thread-local cache has been freed by FastThreadLocal.removeAll(). - Added FastThreadLocalTest to ensure that removeAll() works - Added microbenchmark for FastThreadLocal and JDK ThreadLocal - Upgraded to JMH 0.9 Result: - A user can remove all thread-local variables Netty created, as long as he or she did not exit from the current thread. (Note that there's no way to remove a thread-local variable from outside of the thread.) - FastThreadLocal exposes more useful operations such as isSet() because we always implement a thread local variable via InternalThreadLocalMap instead of falling back to JDK ThreadLocal. - FastThreadLocalBenchmark shows that this change improves the performance of FastThreadLocal even more.
2014-06-17 11:37:58 +02:00
threadCache.remove();
}
final class PoolThreadLocalCache extends FastThreadLocal<PoolThreadCache> {
private final boolean useCacheForAllThreads;
PoolThreadLocalCache(boolean useCacheForAllThreads) {
this.useCacheForAllThreads = useCacheForAllThreads;
}
@Override
Change arena to thread cache mapping algorithm to be closer to ideal. Motivation: Circular assignment of arenas to thread caches can lead to less than optimal mappings in cases where threads are (frequently) shutdown and started. Example Scenario: There are a total of 2 arenas. The first two threads performing an allocation would lead to the following mapping: Thread 0 -> Arena 0 Thread 1 -> Arena 1 Now, assume Thread 1 is shut down and another Thread 2 is started. The current circular assignment algorithm would lead to the following mapping: Thread 0 -> Arena 0 Thread 2 -> Arena 0 Ideally, we want Thread 2 to use Arena 1 though. Presumably, this is not much of an issue for most Netty applications that do all the allocations inside the eventloop, as eventloop threads are seldomly shut down and restarted. However, applications that only use the netty-buffer package or implement their own threading model outside the eventloop might suffer from increased contention. For example, gRPC Java when using the blocking stub performs some allocations outside the eventloop and within its own thread pool that is dynamically sized depending on system load. Modifications: Implement a linear scan algorithm that assigns a new thread cache to the arena that currently backs the fewest thread caches. Result: Closer to ideal mappings between thread caches and arenas. In order to always get an ideal mapping, we would have to re-balance the mapping whenever a thread dies. However, that's difficult because of deallocation.
2016-03-14 17:25:43 +01:00
protected synchronized PoolThreadCache initialValue() {
final PoolArena<byte[]> heapArena = leastUsedArena(heapArenas);
final PoolArena<ByteBuffer> directArena = leastUsedArena(directArenas);
final Thread current = Thread.currentThread();
if (useCacheForAllThreads || current instanceof FastThreadLocalThread) {
final PoolThreadCache cache = new PoolThreadCache(
heapArena, directArena, smallCacheSize, normalCacheSize,
DEFAULT_MAX_CACHED_BUFFER_CAPACITY, DEFAULT_CACHE_TRIM_INTERVAL);
if (DEFAULT_CACHE_TRIM_INTERVAL_MILLIS > 0) {
final EventExecutor executor = ThreadExecutorMap.currentExecutor();
if (executor != null) {
executor.scheduleAtFixedRate(trimTask, DEFAULT_CACHE_TRIM_INTERVAL_MILLIS,
DEFAULT_CACHE_TRIM_INTERVAL_MILLIS, TimeUnit.MILLISECONDS);
}
}
return cache;
}
// No caching so just use 0 as sizes.
return new PoolThreadCache(heapArena, directArena, 0, 0, 0, 0);
}
Refactor FastThreadLocal to simplify TLV management Motivation: When Netty runs in a managed environment such as web application server, Netty needs to provide an explicit way to remove the thread-local variables it created to prevent class loader leaks. FastThreadLocal uses different execution paths for storing a thread-local variable depending on the type of the current thread. It increases the complexity of thread-local removal. Modifications: - Moved FastThreadLocal and FastThreadLocalThread out of the internal package so that a user can use it. - FastThreadLocal now keeps track of all thread local variables it has initialized, and calling FastThreadLocal.removeAll() will remove all thread-local variables of the caller thread. - Added FastThreadLocal.size() for diagnostics and tests - Introduce InternalThreadLocalMap which is a mixture of hard-wired thread local variable fields and extensible indexed variables - FastThreadLocal now uses InternalThreadLocalMap to implement a thread-local variable. - Added ThreadDeathWatcher.unwatch() so that PooledByteBufAllocator tells it to stop watching when its thread-local cache has been freed by FastThreadLocal.removeAll(). - Added FastThreadLocalTest to ensure that removeAll() works - Added microbenchmark for FastThreadLocal and JDK ThreadLocal - Upgraded to JMH 0.9 Result: - A user can remove all thread-local variables Netty created, as long as he or she did not exit from the current thread. (Note that there's no way to remove a thread-local variable from outside of the thread.) - FastThreadLocal exposes more useful operations such as isSet() because we always implement a thread local variable via InternalThreadLocalMap instead of falling back to JDK ThreadLocal. - FastThreadLocalBenchmark shows that this change improves the performance of FastThreadLocal even more.
2014-06-17 11:37:58 +02:00
@Override
Change arena to thread cache mapping algorithm to be closer to ideal. Motivation: Circular assignment of arenas to thread caches can lead to less than optimal mappings in cases where threads are (frequently) shutdown and started. Example Scenario: There are a total of 2 arenas. The first two threads performing an allocation would lead to the following mapping: Thread 0 -> Arena 0 Thread 1 -> Arena 1 Now, assume Thread 1 is shut down and another Thread 2 is started. The current circular assignment algorithm would lead to the following mapping: Thread 0 -> Arena 0 Thread 2 -> Arena 0 Ideally, we want Thread 2 to use Arena 1 though. Presumably, this is not much of an issue for most Netty applications that do all the allocations inside the eventloop, as eventloop threads are seldomly shut down and restarted. However, applications that only use the netty-buffer package or implement their own threading model outside the eventloop might suffer from increased contention. For example, gRPC Java when using the blocking stub performs some allocations outside the eventloop and within its own thread pool that is dynamically sized depending on system load. Modifications: Implement a linear scan algorithm that assigns a new thread cache to the arena that currently backs the fewest thread caches. Result: Closer to ideal mappings between thread caches and arenas. In order to always get an ideal mapping, we would have to re-balance the mapping whenever a thread dies. However, that's difficult because of deallocation.
2016-03-14 17:25:43 +01:00
protected void onRemoval(PoolThreadCache threadCache) {
Don't try to put back MemoryRegionCache.Entry objects into the Recycler when recycled because of a finalizer. (#8955) Motivation: In MemoryRegionCache.Entry we use the Recycler to reduce GC pressure and churn. The problem is that these will also be recycled when the PoolThreadCache is collected and finalize() is called. This then can have the effect that we try to load class but the WebApp is already stoped. This will produce an stacktrace like this on Tomcat: ``` 19-Mar-2019 15:53:21.351 INFO [Finalizer] org.apache.catalina.loader.WebappClassLoaderBase.checkStateForResourceLoading Illegal access: this web application instance has been stopped already. Could not load [java.util.WeakHashMap]. The following stack trace is thrown for debugging purposes as well as to attempt to terminate the thread which caused the illegal access. java.lang.IllegalStateException: Illegal access: this web application instance has been stopped already. Could not load [java.util.WeakHashMap]. The following stack trace is thrown for debugging purposes as well as to attempt to terminate the thread which caused the illegal access. at org.apache.catalina.loader.WebappClassLoaderBase.checkStateForResourceLoading(WebappClassLoaderBase.java:1383) at org.apache.catalina.loader.WebappClassLoaderBase.checkStateForClassLoading(WebappClassLoaderBase.java:1371) at org.apache.catalina.loader.WebappClassLoaderBase.loadClass(WebappClassLoaderBase.java:1224) at org.apache.catalina.loader.WebappClassLoaderBase.loadClass(WebappClassLoaderBase.java:1186) at io.netty.util.Recycler$3.initialValue(Recycler.java:233) at io.netty.util.Recycler$3.initialValue(Recycler.java:230) at io.netty.util.concurrent.FastThreadLocal.initialize(FastThreadLocal.java:188) at io.netty.util.concurrent.FastThreadLocal.get(FastThreadLocal.java:142) at io.netty.util.Recycler$Stack.pushLater(Recycler.java:624) at io.netty.util.Recycler$Stack.push(Recycler.java:597) at io.netty.util.Recycler$DefaultHandle.recycle(Recycler.java:225) at io.netty.buffer.PoolThreadCache$MemoryRegionCache$Entry.recycle(PoolThreadCache.java:478) at io.netty.buffer.PoolThreadCache$MemoryRegionCache.freeEntry(PoolThreadCache.java:459) at io.netty.buffer.PoolThreadCache$MemoryRegionCache.free(PoolThreadCache.java:430) at io.netty.buffer.PoolThreadCache$MemoryRegionCache.free(PoolThreadCache.java:422) at io.netty.buffer.PoolThreadCache.free(PoolThreadCache.java:279) at io.netty.buffer.PoolThreadCache.free(PoolThreadCache.java:270) at io.netty.buffer.PoolThreadCache.free(PoolThreadCache.java:241) at io.netty.buffer.PoolThreadCache.finalize(PoolThreadCache.java:230) at java.lang.System$2.invokeFinalize(System.java:1270) at java.lang.ref.Finalizer.runFinalizer(Finalizer.java:102) at java.lang.ref.Finalizer.access$100(Finalizer.java:34) at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:217) ``` Beside this we also need to ensure we not try to lazy load SizeClass when the finalizer is used as it may not be present anymore if the ClassLoader is already destroyed. This would produce an error like: ``` 20-Mar-2019 11:26:35.254 INFO [Finalizer] org.apache.catalina.loader.WebappClassLoaderBase.checkStateForResourceLoading Illegal access: this web application instance has been stopped already. Could not load [io.netty.buffer.PoolArena$1]. The following stack trace is thrown for debugging purposes as well as to attempt to terminate the thread which caused the illegal access. java.lang.IllegalStateException: Illegal access: this web application instance has been stopped already. Could not load [io.netty.buffer.PoolArena$1]. The following stack trace is thrown for debugging purposes as well as to attempt to terminate the thread which caused the illegal access. at org.apache.catalina.loader.WebappClassLoaderBase.checkStateForResourceLoading(WebappClassLoaderBase.java:1383) at org.apache.catalina.loader.WebappClassLoaderBase.checkStateForClassLoading(WebappClassLoaderBase.java:1371) at org.apache.catalina.loader.WebappClassLoaderBase.loadClass(WebappClassLoaderBase.java:1224) at org.apache.catalina.loader.WebappClassLoaderBase.loadClass(WebappClassLoaderBase.java:1186) at io.netty.buffer.PoolArena.freeChunk(PoolArena.java:287) at io.netty.buffer.PoolThreadCache$MemoryRegionCache.freeEntry(PoolThreadCache.java:464) at io.netty.buffer.PoolThreadCache$MemoryRegionCache.free(PoolThreadCache.java:429) at io.netty.buffer.PoolThreadCache$MemoryRegionCache.free(PoolThreadCache.java:421) at io.netty.buffer.PoolThreadCache.free(PoolThreadCache.java:278) at io.netty.buffer.PoolThreadCache.free(PoolThreadCache.java:269) at io.netty.buffer.PoolThreadCache.free(PoolThreadCache.java:240) at io.netty.buffer.PoolThreadCache.finalize(PoolThreadCache.java:229) at java.lang.System$2.invokeFinalize(System.java:1270) at java.lang.ref.Finalizer.runFinalizer(Finalizer.java:102) at java.lang.ref.Finalizer.access$100(Finalizer.java:34) at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:217) ``` Modifications: - Only try to put the Entry back into the Recycler if the PoolThredCache is not destroyed because of the finalizer. - Only try to access SizeClass if not triggered by finalizer. Result: No IllegalStateException anymoe when a webapp is reloaded in Tomcat that uses netty and uses the PooledByteBufAllocator.
2019-03-22 12:16:21 +01:00
threadCache.free(false);
Change arena to thread cache mapping algorithm to be closer to ideal. Motivation: Circular assignment of arenas to thread caches can lead to less than optimal mappings in cases where threads are (frequently) shutdown and started. Example Scenario: There are a total of 2 arenas. The first two threads performing an allocation would lead to the following mapping: Thread 0 -> Arena 0 Thread 1 -> Arena 1 Now, assume Thread 1 is shut down and another Thread 2 is started. The current circular assignment algorithm would lead to the following mapping: Thread 0 -> Arena 0 Thread 2 -> Arena 0 Ideally, we want Thread 2 to use Arena 1 though. Presumably, this is not much of an issue for most Netty applications that do all the allocations inside the eventloop, as eventloop threads are seldomly shut down and restarted. However, applications that only use the netty-buffer package or implement their own threading model outside the eventloop might suffer from increased contention. For example, gRPC Java when using the blocking stub performs some allocations outside the eventloop and within its own thread pool that is dynamically sized depending on system load. Modifications: Implement a linear scan algorithm that assigns a new thread cache to the arena that currently backs the fewest thread caches. Result: Closer to ideal mappings between thread caches and arenas. In order to always get an ideal mapping, we would have to re-balance the mapping whenever a thread dies. However, that's difficult because of deallocation.
2016-03-14 17:25:43 +01:00
}
private <T> PoolArena<T> leastUsedArena(PoolArena<T>[] arenas) {
if (arenas == null || arenas.length == 0) {
return null;
}
PoolArena<T> minArena = arenas[0];
for (int i = 1; i < arenas.length; i++) {
PoolArena<T> arena = arenas[i];
if (arena.numThreadCaches.get() < minArena.numThreadCaches.get()) {
minArena = arena;
}
}
return minArena;
}
}
@Override
public PooledByteBufAllocatorMetric metric() {
return metric;
}
/**
* Return the number of heap arenas.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#numHeapArenas()}.
*/
@Deprecated
public int numHeapArenas() {
return heapArenaMetrics.size();
}
/**
* Return the number of direct arenas.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#numDirectArenas()}.
*/
@Deprecated
public int numDirectArenas() {
return directArenaMetrics.size();
}
/**
* Return a {@link List} of all heap {@link PoolArenaMetric}s that are provided by this pool.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#heapArenas()}.
*/
@Deprecated
public List<PoolArenaMetric> heapArenas() {
return heapArenaMetrics;
}
/**
* Return a {@link List} of all direct {@link PoolArenaMetric}s that are provided by this pool.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#directArenas()}.
*/
@Deprecated
public List<PoolArenaMetric> directArenas() {
return directArenaMetrics;
}
/**
* Return the number of thread local caches used by this {@link PooledByteBufAllocator}.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#numThreadLocalCaches()}.
*/
@Deprecated
public int numThreadLocalCaches() {
Change arena to thread cache mapping algorithm to be closer to ideal. Motivation: Circular assignment of arenas to thread caches can lead to less than optimal mappings in cases where threads are (frequently) shutdown and started. Example Scenario: There are a total of 2 arenas. The first two threads performing an allocation would lead to the following mapping: Thread 0 -> Arena 0 Thread 1 -> Arena 1 Now, assume Thread 1 is shut down and another Thread 2 is started. The current circular assignment algorithm would lead to the following mapping: Thread 0 -> Arena 0 Thread 2 -> Arena 0 Ideally, we want Thread 2 to use Arena 1 though. Presumably, this is not much of an issue for most Netty applications that do all the allocations inside the eventloop, as eventloop threads are seldomly shut down and restarted. However, applications that only use the netty-buffer package or implement their own threading model outside the eventloop might suffer from increased contention. For example, gRPC Java when using the blocking stub performs some allocations outside the eventloop and within its own thread pool that is dynamically sized depending on system load. Modifications: Implement a linear scan algorithm that assigns a new thread cache to the arena that currently backs the fewest thread caches. Result: Closer to ideal mappings between thread caches and arenas. In order to always get an ideal mapping, we would have to re-balance the mapping whenever a thread dies. However, that's difficult because of deallocation.
2016-03-14 17:25:43 +01:00
PoolArena<?>[] arenas = heapArenas != null ? heapArenas : directArenas;
if (arenas == null) {
return 0;
}
int total = 0;
for (PoolArena<?> arena : arenas) {
total += arena.numThreadCaches.get();
Change arena to thread cache mapping algorithm to be closer to ideal. Motivation: Circular assignment of arenas to thread caches can lead to less than optimal mappings in cases where threads are (frequently) shutdown and started. Example Scenario: There are a total of 2 arenas. The first two threads performing an allocation would lead to the following mapping: Thread 0 -> Arena 0 Thread 1 -> Arena 1 Now, assume Thread 1 is shut down and another Thread 2 is started. The current circular assignment algorithm would lead to the following mapping: Thread 0 -> Arena 0 Thread 2 -> Arena 0 Ideally, we want Thread 2 to use Arena 1 though. Presumably, this is not much of an issue for most Netty applications that do all the allocations inside the eventloop, as eventloop threads are seldomly shut down and restarted. However, applications that only use the netty-buffer package or implement their own threading model outside the eventloop might suffer from increased contention. For example, gRPC Java when using the blocking stub performs some allocations outside the eventloop and within its own thread pool that is dynamically sized depending on system load. Modifications: Implement a linear scan algorithm that assigns a new thread cache to the arena that currently backs the fewest thread caches. Result: Closer to ideal mappings between thread caches and arenas. In order to always get an ideal mapping, we would have to re-balance the mapping whenever a thread dies. However, that's difficult because of deallocation.
2016-03-14 17:25:43 +01:00
}
return total;
}
/**
* Return the size of the tiny cache.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#tinyCacheSize()}.
*/
@Deprecated
public int tinyCacheSize() {
return 0;
}
/**
* Return the size of the small cache.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#smallCacheSize()}.
*/
@Deprecated
public int smallCacheSize() {
return smallCacheSize;
}
/**
* Return the size of the normal cache.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#normalCacheSize()}.
*/
@Deprecated
public int normalCacheSize() {
return normalCacheSize;
}
/**
* Return the chunk size for an arena.
*
* @deprecated use {@link PooledByteBufAllocatorMetric#chunkSize()}.
*/
@Deprecated
public final int chunkSize() {
return chunkSize;
}
final long usedHeapMemory() {
return usedMemory(heapArenas);
}
final long usedDirectMemory() {
return usedMemory(directArenas);
}
private static long usedMemory(PoolArena<?>[] arenas) {
if (arenas == null) {
return -1;
}
long used = 0;
for (PoolArena<?> arena : arenas) {
used += arena.numActiveBytes();
if (used < 0) {
return Long.MAX_VALUE;
}
}
return used;
}
/**
* Returns the number of bytes of heap memory that is currently pinned to heap buffers allocated by a
* {@link ByteBufAllocator}, or {@code -1} if unknown.
* A buffer can pin more memory than its {@linkplain ByteBuf#capacity() capacity} might indicate,
* due to implementation details of the allocator.
*/
public final long pinnedHeapMemory() {
return pinnedMemory(heapArenas);
}
/**
* Returns the number of bytes of direct memory that is currently pinned to direct buffers allocated by a
* {@link ByteBufAllocator}, or {@code -1} if unknown.
* A buffer can pin more memory than its {@linkplain ByteBuf#capacity() capacity} might indicate,
* due to implementation details of the allocator.
*/
public final long pinnedDirectMemory() {
return pinnedMemory(directArenas);
}
private static long pinnedMemory(PoolArena<?>[] arenas) {
if (arenas == null) {
return -1;
}
long used = 0;
for (PoolArena<?> arena : arenas) {
used += arena.numPinnedBytes();
if (used < 0) {
return Long.MAX_VALUE;
}
}
return used;
}
final PoolThreadCache threadCache() {
PoolThreadCache cache = threadCache.get();
assert cache != null;
return cache;
}
/**
* Trim thread local cache for the current {@link Thread}, which will give back any cached memory that was not
* allocated frequently since the last trim operation.
*
* Returns {@code true} if a cache for the current {@link Thread} exists and so was trimmed, false otherwise.
*/
public boolean trimCurrentThreadCache() {
PoolThreadCache cache = threadCache.getIfExists();
if (cache != null) {
cache.trim();
return true;
}
return false;
}
/**
* Returns the status of the allocator (which contains all metrics) as string. Be aware this may be expensive
* and so should not called too frequently.
*/
public String dumpStats() {
int heapArenasLen = heapArenas == null ? 0 : heapArenas.length;
StringBuilder buf = new StringBuilder(512)
.append(heapArenasLen)
.append(" heap arena(s):")
.append(StringUtil.NEWLINE);
if (heapArenasLen > 0) {
for (PoolArena<byte[]> a: heapArenas) {
buf.append(a);
}
}
int directArenasLen = directArenas == null ? 0 : directArenas.length;
buf.append(directArenasLen)
.append(" direct arena(s):")
.append(StringUtil.NEWLINE);
if (directArenasLen > 0) {
for (PoolArena<ByteBuffer> a: directArenas) {
buf.append(a);
}
}
return buf.toString();
}
}