Provide a way to cache the internal nioBuffer of the PooledByteBuffer to reduce GC. (#8593)

Motivation:

Often a temporary ByteBuffer is used which can be cached to reduce the GC pressure.

Modifications:

Add a Deque per PoolChunk which will be used for caching.

Result:

Less GC.
This commit is contained in:
Norman Maurer 2018-11-27 13:55:13 +01:00 committed by GitHub
parent 89639ce322
commit 8cd005ba43
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 39 additions and 3 deletions

View File

@ -16,6 +16,10 @@
package io.netty.buffer; package io.netty.buffer;
import java.nio.ByteBuffer;
import java.util.ArrayDeque;
import java.util.Deque;
/** /**
* Description of algorithm for PageRun/PoolSubpage allocation from PoolChunk * Description of algorithm for PageRun/PoolSubpage allocation from PoolChunk
* *
@ -107,7 +111,6 @@ final class PoolChunk<T> implements PoolChunkMetric {
final T memory; final T memory;
final boolean unpooled; final boolean unpooled;
final int offset; final int offset;
private final byte[] memoryMap; private final byte[] memoryMap;
private final byte[] depthMap; private final byte[] depthMap;
private final PoolSubpage<T>[] subpages; private final PoolSubpage<T>[] subpages;
@ -122,6 +125,13 @@ final class PoolChunk<T> implements PoolChunkMetric {
/** Used to mark memory as unusable */ /** Used to mark memory as unusable */
private final byte unusable; private final byte unusable;
// Use as cache for ByteBuffer created from the memory. These are just duplicates and so are only a container
// around the memory itself. These are often needed for operations within the Pooled*DirectByteBuf and so
// may produce extra GC, which can be greatly reduced by caching the duplicates.
//
// This may be null if the PoolChunk is unpooled as pooling the ByteBuffer instances does not make any sense here.
private final Deque<ByteBuffer> cachedNioBuffers;
private int freeBytes; private int freeBytes;
PoolChunkList<T> parent; PoolChunkList<T> parent;
@ -163,6 +173,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
} }
subpages = newSubpageArray(maxSubpageAllocs); subpages = newSubpageArray(maxSubpageAllocs);
cachedNioBuffers = new ArrayDeque<ByteBuffer>();
} }
/** Creates a special chunk that is not pooled. */ /** Creates a special chunk that is not pooled. */
@ -182,6 +193,21 @@ final class PoolChunk<T> implements PoolChunkMetric {
chunkSize = size; chunkSize = size;
log2ChunkSize = log2(chunkSize); log2ChunkSize = log2(chunkSize);
maxSubpageAllocs = 0; maxSubpageAllocs = 0;
cachedNioBuffers = null;
}
ByteBuffer pollCachedNioBuffer() {
// We use LIFO to increase the chance that its still "hot" and so in the CPU / L* cache.
return cachedNioBuffers != null ? cachedNioBuffers.pollLast() : null;
}
void offerCachedNioBuffer(ByteBuffer nioBuffer) {
// Only cache if we did not reach the limit yet and if its not unpooled.
// If we do just drop it on the floor and let the GC collect it.
if (cachedNioBuffers != null &&
cachedNioBuffers.size() <= PooledByteBufAllocator.DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK) {
cachedNioBuffers.offer(nioBuffer);
}
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")

View File

@ -56,13 +56,13 @@ abstract class PooledByteBuf<T> extends AbstractReferenceCountedByteBuf {
this.chunk = chunk; this.chunk = chunk;
memory = chunk.memory; memory = chunk.memory;
tmpNioBuf = chunk.pollCachedNioBuffer();
allocator = chunk.arena.parent; allocator = chunk.arena.parent;
this.cache = cache; this.cache = cache;
this.handle = handle; this.handle = handle;
this.offset = offset; this.offset = offset;
this.length = length; this.length = length;
this.maxLength = maxLength; this.maxLength = maxLength;
tmpNioBuf = null;
} }
/** /**
@ -166,7 +166,11 @@ abstract class PooledByteBuf<T> extends AbstractReferenceCountedByteBuf {
final long handle = this.handle; final long handle = this.handle;
this.handle = -1; this.handle = -1;
memory = null; memory = null;
tmpNioBuf = null; if (tmpNioBuf != null) {
// Try to put back into the cache for later usage.
chunk.offerCachedNioBuffer(tmpNioBuf);
tmpNioBuf = null;
}
chunk.arena.free(chunk, handle, maxLength, cache); chunk.arena.free(chunk, handle, maxLength, cache);
chunk = null; chunk = null;
recycle(); recycle();

View File

@ -45,6 +45,7 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator implements
private static final int DEFAULT_CACHE_TRIM_INTERVAL; private static final int DEFAULT_CACHE_TRIM_INTERVAL;
private static final boolean DEFAULT_USE_CACHE_FOR_ALL_THREADS; private static final boolean DEFAULT_USE_CACHE_FOR_ALL_THREADS;
private static final int DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT; private static final int DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT;
static final int DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK;
private static final int MIN_PAGE_SIZE = 4096; private static final int MIN_PAGE_SIZE = 4096;
private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2); private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
@ -116,6 +117,9 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator implements
DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT = SystemPropertyUtil.getInt( DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT = SystemPropertyUtil.getInt(
"io.netty.allocator.directMemoryCacheAlignment", 0); "io.netty.allocator.directMemoryCacheAlignment", 0);
DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK = SystemPropertyUtil.getInt(
"io.netty.allocator.maxCachedByteBuffersPerChunk", 1024);
if (logger.isDebugEnabled()) { if (logger.isDebugEnabled()) {
logger.debug("-Dio.netty.allocator.numHeapArenas: {}", DEFAULT_NUM_HEAP_ARENA); logger.debug("-Dio.netty.allocator.numHeapArenas: {}", DEFAULT_NUM_HEAP_ARENA);
logger.debug("-Dio.netty.allocator.numDirectArenas: {}", DEFAULT_NUM_DIRECT_ARENA); logger.debug("-Dio.netty.allocator.numDirectArenas: {}", DEFAULT_NUM_DIRECT_ARENA);
@ -136,6 +140,8 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator implements
logger.debug("-Dio.netty.allocator.maxCachedBufferCapacity: {}", DEFAULT_MAX_CACHED_BUFFER_CAPACITY); logger.debug("-Dio.netty.allocator.maxCachedBufferCapacity: {}", DEFAULT_MAX_CACHED_BUFFER_CAPACITY);
logger.debug("-Dio.netty.allocator.cacheTrimInterval: {}", DEFAULT_CACHE_TRIM_INTERVAL); logger.debug("-Dio.netty.allocator.cacheTrimInterval: {}", DEFAULT_CACHE_TRIM_INTERVAL);
logger.debug("-Dio.netty.allocator.useCacheForAllThreads: {}", DEFAULT_USE_CACHE_FOR_ALL_THREADS); logger.debug("-Dio.netty.allocator.useCacheForAllThreads: {}", DEFAULT_USE_CACHE_FOR_ALL_THREADS);
logger.debug("-Dio.netty.allocator.maxCachedByteBuffersPerChunk: {}",
DEFAULT_MAX_CACHED_BYTEBUFFERS_PER_CHUNK);
} }
} }