diff --git a/buffer/src/main/java/io/netty/buffer/PoolChunk.java b/buffer/src/main/java/io/netty/buffer/PoolChunk.java index 93eda84104..d4b952ffda 100644 --- a/buffer/src/main/java/io/netty/buffer/PoolChunk.java +++ b/buffer/src/main/java/io/netty/buffer/PoolChunk.java @@ -16,6 +16,10 @@ package io.netty.buffer; +import java.nio.ByteBuffer; +import java.util.ArrayDeque; +import java.util.Deque; + /** * Description of algorithm for PageRun/PoolSubpage allocation from PoolChunk * @@ -107,7 +111,6 @@ final class PoolChunk implements PoolChunkMetric { final T memory; final boolean unpooled; final int offset; - private final byte[] memoryMap; private final byte[] depthMap; private final PoolSubpage[] subpages; @@ -122,6 +125,13 @@ final class PoolChunk implements PoolChunkMetric { /** Used to mark memory as unusable */ private final byte unusable; + // Use as cache for ByteBuffer created from the memory. These are just duplicates and so are only a container + // around the memory itself. These are often needed for operations within the Pooled*DirectByteBuf and so + // may produce extra GC, which can be greatly reduced by caching the duplicates. + // + // This may be null if the PoolChunk is unpooled as pooling the ByteBuffer instances does not make any sense here. + private final Deque cachedNioBuffers; + private int freeBytes; PoolChunkList parent; @@ -163,6 +173,7 @@ final class PoolChunk implements PoolChunkMetric { } subpages = newSubpageArray(maxSubpageAllocs); + cachedNioBuffers = new ArrayDeque(); } /** Creates a special chunk that is not pooled. */ @@ -182,6 +193,21 @@ final class PoolChunk implements PoolChunkMetric { chunkSize = size; log2ChunkSize = log2(chunkSize); maxSubpageAllocs = 0; + cachedNioBuffers = null; + } + + ByteBuffer pollCachedNioBuffer() { + // We use LIFO to increase the chance that its still "hot" and so in the CPU / L* cache. + return cachedNioBuffers != null ? cachedNioBuffers.pollLast() : null; + } + + void offerCachedNioBuffer(ByteBuffer nioBuffer) { + // Only cache if we did not reach the limit yet and if its not unpooled. + // If we do just drop it on the floor and let the GC collect it. + if (cachedNioBuffers != null && + cachedNioBuffers.size() <= PooledByteBufAllocator.DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK) { + cachedNioBuffers.offer(nioBuffer); + } } @SuppressWarnings("unchecked") diff --git a/buffer/src/main/java/io/netty/buffer/PooledByteBuf.java b/buffer/src/main/java/io/netty/buffer/PooledByteBuf.java index 56a4be3872..699ca1ca62 100644 --- a/buffer/src/main/java/io/netty/buffer/PooledByteBuf.java +++ b/buffer/src/main/java/io/netty/buffer/PooledByteBuf.java @@ -56,13 +56,13 @@ abstract class PooledByteBuf extends AbstractReferenceCountedByteBuf { this.chunk = chunk; memory = chunk.memory; + tmpNioBuf = chunk.pollCachedNioBuffer(); allocator = chunk.arena.parent; this.cache = cache; this.handle = handle; this.offset = offset; this.length = length; this.maxLength = maxLength; - tmpNioBuf = null; } /** @@ -166,7 +166,11 @@ abstract class PooledByteBuf extends AbstractReferenceCountedByteBuf { final long handle = this.handle; this.handle = -1; memory = null; - tmpNioBuf = null; + if (tmpNioBuf != null) { + // Try to put back into the cache for later usage. + chunk.offerCachedNioBuffer(tmpNioBuf); + tmpNioBuf = null; + } chunk.arena.free(chunk, handle, maxLength, cache); chunk = null; recycle(); diff --git a/buffer/src/main/java/io/netty/buffer/PooledByteBufAllocator.java b/buffer/src/main/java/io/netty/buffer/PooledByteBufAllocator.java index aa9ee28b94..92bb476fe7 100644 --- a/buffer/src/main/java/io/netty/buffer/PooledByteBufAllocator.java +++ b/buffer/src/main/java/io/netty/buffer/PooledByteBufAllocator.java @@ -45,6 +45,7 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator implements private static final int DEFAULT_CACHE_TRIM_INTERVAL; private static final boolean DEFAULT_USE_CACHE_FOR_ALL_THREADS; private static final int DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT; + static final int DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK; private static final int MIN_PAGE_SIZE = 4096; private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2); @@ -116,6 +117,9 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator implements DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT = SystemPropertyUtil.getInt( "io.netty.allocator.directMemoryCacheAlignment", 0); + DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK = SystemPropertyUtil.getInt( + "io.netty.allocator.maxCachedByteBuffersPerChunk", 1024); + if (logger.isDebugEnabled()) { logger.debug("-Dio.netty.allocator.numHeapArenas: {}", DEFAULT_NUM_HEAP_ARENA); logger.debug("-Dio.netty.allocator.numDirectArenas: {}", DEFAULT_NUM_DIRECT_ARENA); @@ -136,6 +140,8 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator implements logger.debug("-Dio.netty.allocator.maxCachedBufferCapacity: {}", DEFAULT_MAX_CACHED_BUFFER_CAPACITY); logger.debug("-Dio.netty.allocator.cacheTrimInterval: {}", DEFAULT_CACHE_TRIM_INTERVAL); logger.debug("-Dio.netty.allocator.useCacheForAllThreads: {}", DEFAULT_USE_CACHE_FOR_ALL_THREADS); + logger.debug("-Dio.netty.allocator.maxCachedByteBuffersPerChunk: {}", + DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK); } }