From 05b7af7ae4a2f7f4b6053c363f88fba6dead69c8 Mon Sep 17 00:00:00 2001 From: Dmitry Konstantinov Date: Mon, 6 Apr 2020 09:01:01 +0300 Subject: [PATCH] Propagate ref to pool thread cache down in the allocation stack to avoid extra thread local lookup (#10166) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Motivation: PoolChunk requires a link to a PoolThreadCache to init ByteBuf. Currently the link is retrieved from a thread local: arena.parent.threadCache(). It has some performance cost. At the beginning of the allocation call the PoolThreadCache is already retrieved from the thread local. The reference can be propagated through the calls and used. Modifications: Replace second lookup of PoolThreadCache during ByteBuf init by propagation of a reference to PoolThreadCache down in the allocation stack explicitly Result: Improve performance of ByteBuf allocation --Before-- Benchmark (size) (tokens) (useThreadCache) Mode Cnt Score Error Units SimpleByteBufPooledAllocatorBenchmark.getAndRelease 123 0 true avgt 20 57.112 ± 1.004 ns/op SimpleByteBufPooledAllocatorBenchmark.getAndRelease 123 100 true avgt 20 222.827 ± 1.307 ns/op --After-- Benchmark (size) (tokens) (useThreadCache) Mode Cnt Score Error Units SimpleByteBufPooledAllocatorBenchmark.getAndRelease 123 0 true avgt 20 50.732 ± 1.321 ns/op SimpleByteBufPooledAllocatorBenchmark.getAndRelease 123 100 true avgt 20 216.892 ± 3.806 ns/op --- .../main/java/io/netty/buffer/PoolArena.java | 18 +++++++++-------- .../main/java/io/netty/buffer/PoolChunk.java | 20 ++++++++++--------- .../java/io/netty/buffer/PoolChunkList.java | 4 ++-- .../java/io/netty/buffer/PoolThreadCache.java | 18 +++++++++-------- 4 files changed, 33 insertions(+), 27 deletions(-) diff --git a/buffer/src/main/java/io/netty/buffer/PoolArena.java b/buffer/src/main/java/io/netty/buffer/PoolArena.java index 308eb96c1e..f58697ee15 100644 --- a/buffer/src/main/java/io/netty/buffer/PoolArena.java +++ b/buffer/src/main/java/io/netty/buffer/PoolArena.java @@ -206,13 +206,13 @@ abstract class PoolArena implements PoolArenaMetric { assert s.doNotDestroy && s.elemSize == normCapacity; long handle = s.allocate(); assert handle >= 0; - s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity); + s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity, cache); incTinySmallAllocation(tiny); return; } } synchronized (this) { - allocateNormal(buf, reqCapacity, normCapacity); + allocateNormal(buf, reqCapacity, normCapacity, cache); } incTinySmallAllocation(tiny); @@ -224,7 +224,7 @@ abstract class PoolArena implements PoolArenaMetric { return; } synchronized (this) { - allocateNormal(buf, reqCapacity, normCapacity); + allocateNormal(buf, reqCapacity, normCapacity, cache); ++allocationsNormal; } } else { @@ -234,16 +234,18 @@ abstract class PoolArena implements PoolArenaMetric { } // Method must be called inside synchronized(this) { ... } block - private void allocateNormal(PooledByteBuf buf, int reqCapacity, int normCapacity) { - if (q050.allocate(buf, reqCapacity, normCapacity) || q025.allocate(buf, reqCapacity, normCapacity) || - q000.allocate(buf, reqCapacity, normCapacity) || qInit.allocate(buf, reqCapacity, normCapacity) || - q075.allocate(buf, reqCapacity, normCapacity)) { + private void allocateNormal(PooledByteBuf buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) { + if (q050.allocate(buf, reqCapacity, normCapacity, threadCache) || + q025.allocate(buf, reqCapacity, normCapacity, threadCache) || + q000.allocate(buf, reqCapacity, normCapacity, threadCache) || + qInit.allocate(buf, reqCapacity, normCapacity, threadCache) || + q075.allocate(buf, reqCapacity, normCapacity, threadCache)) { return; } // Add a new chunk. PoolChunk c = newChunk(pageSize, maxOrder, pageShifts, chunkSize); - boolean success = c.allocate(buf, reqCapacity, normCapacity); + boolean success = c.allocate(buf, reqCapacity, normCapacity, threadCache); assert success; qInit.add(c); } diff --git a/buffer/src/main/java/io/netty/buffer/PoolChunk.java b/buffer/src/main/java/io/netty/buffer/PoolChunk.java index 897da7b923..4f84268cb4 100644 --- a/buffer/src/main/java/io/netty/buffer/PoolChunk.java +++ b/buffer/src/main/java/io/netty/buffer/PoolChunk.java @@ -222,7 +222,7 @@ final class PoolChunk implements PoolChunkMetric { return 100 - freePercentage; } - boolean allocate(PooledByteBuf buf, int reqCapacity, int normCapacity) { + boolean allocate(PooledByteBuf buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) { final long handle; if ((normCapacity & subpageOverflowMask) != 0) { // >= pageSize handle = allocateRun(normCapacity); @@ -234,7 +234,7 @@ final class PoolChunk implements PoolChunkMetric { return false; } ByteBuffer nioBuffer = cachedNioBuffers != null ? cachedNioBuffers.pollLast() : null; - initBuf(buf, nioBuffer, handle, reqCapacity); + initBuf(buf, nioBuffer, handle, reqCapacity, threadCache); return true; } @@ -399,25 +399,27 @@ final class PoolChunk implements PoolChunkMetric { } } - void initBuf(PooledByteBuf buf, ByteBuffer nioBuffer, long handle, int reqCapacity) { + void initBuf(PooledByteBuf buf, ByteBuffer nioBuffer, long handle, int reqCapacity, + PoolThreadCache threadCache) { int memoryMapIdx = memoryMapIdx(handle); int bitmapIdx = bitmapIdx(handle); if (bitmapIdx == 0) { byte val = value(memoryMapIdx); assert val == unusable : String.valueOf(val); buf.init(this, nioBuffer, handle, runOffset(memoryMapIdx) + offset, - reqCapacity, runLength(memoryMapIdx), arena.parent.threadCache()); + reqCapacity, runLength(memoryMapIdx), threadCache); } else { - initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx, reqCapacity); + initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx, reqCapacity, threadCache); } } - void initBufWithSubpage(PooledByteBuf buf, ByteBuffer nioBuffer, long handle, int reqCapacity) { - initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx(handle), reqCapacity); + void initBufWithSubpage(PooledByteBuf buf, ByteBuffer nioBuffer, long handle, int reqCapacity, + PoolThreadCache threadCache) { + initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx(handle), reqCapacity, threadCache); } private void initBufWithSubpage(PooledByteBuf buf, ByteBuffer nioBuffer, - long handle, int bitmapIdx, int reqCapacity) { + long handle, int bitmapIdx, int reqCapacity, PoolThreadCache threadCache) { assert bitmapIdx != 0; int memoryMapIdx = memoryMapIdx(handle); @@ -429,7 +431,7 @@ final class PoolChunk implements PoolChunkMetric { buf.init( this, nioBuffer, handle, runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset, - reqCapacity, subpage.elemSize, arena.parent.threadCache()); + reqCapacity, subpage.elemSize, threadCache); } private byte value(int id) { diff --git a/buffer/src/main/java/io/netty/buffer/PoolChunkList.java b/buffer/src/main/java/io/netty/buffer/PoolChunkList.java index bbdc65590d..eeb95ff94f 100644 --- a/buffer/src/main/java/io/netty/buffer/PoolChunkList.java +++ b/buffer/src/main/java/io/netty/buffer/PoolChunkList.java @@ -96,7 +96,7 @@ final class PoolChunkList implements PoolChunkListMetric { this.prevList = prevList; } - boolean allocate(PooledByteBuf buf, int reqCapacity, int normCapacity) { + boolean allocate(PooledByteBuf buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) { if (normCapacity > maxCapacity) { // Either this PoolChunkList is empty or the requested capacity is larger then the capacity which can // be handled by the PoolChunks that are contained in this PoolChunkList. @@ -104,7 +104,7 @@ final class PoolChunkList implements PoolChunkListMetric { } for (PoolChunk cur = head; cur != null; cur = cur.next) { - if (cur.allocate(buf, reqCapacity, normCapacity)) { + if (cur.allocate(buf, reqCapacity, normCapacity, threadCache)) { if (cur.freeBytes <= freeMinThreshold) { remove(cur); nextList.add(cur); diff --git a/buffer/src/main/java/io/netty/buffer/PoolThreadCache.java b/buffer/src/main/java/io/netty/buffer/PoolThreadCache.java index 92e46a0f02..631fb2fea2 100644 --- a/buffer/src/main/java/io/netty/buffer/PoolThreadCache.java +++ b/buffer/src/main/java/io/netty/buffer/PoolThreadCache.java @@ -184,7 +184,7 @@ final class PoolThreadCache { // no cache found so just return false here return false; } - boolean allocated = cache.allocate(buf, reqCapacity); + boolean allocated = cache.allocate(buf, reqCapacity, this); if (++ allocations >= freeSweepAllocationThreshold) { allocations = 0; trim(); @@ -345,8 +345,9 @@ final class PoolThreadCache { @Override protected void initBuf( - PoolChunk chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf buf, int reqCapacity) { - chunk.initBufWithSubpage(buf, nioBuffer, handle, reqCapacity); + PoolChunk chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf buf, int reqCapacity, + PoolThreadCache threadCache) { + chunk.initBufWithSubpage(buf, nioBuffer, handle, reqCapacity, threadCache); } } @@ -360,8 +361,9 @@ final class PoolThreadCache { @Override protected void initBuf( - PoolChunk chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf buf, int reqCapacity) { - chunk.initBuf(buf, nioBuffer, handle, reqCapacity); + PoolChunk chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf buf, int reqCapacity, + PoolThreadCache threadCache) { + chunk.initBuf(buf, nioBuffer, handle, reqCapacity, threadCache); } } @@ -381,7 +383,7 @@ final class PoolThreadCache { * Init the {@link PooledByteBuf} using the provided chunk and handle with the capacity restrictions. */ protected abstract void initBuf(PoolChunk chunk, ByteBuffer nioBuffer, long handle, - PooledByteBuf buf, int reqCapacity); + PooledByteBuf buf, int reqCapacity, PoolThreadCache threadCache); /** * Add to cache if not already full. @@ -401,12 +403,12 @@ final class PoolThreadCache { /** * Allocate something out of the cache if possible and remove the entry from the cache. */ - public final boolean allocate(PooledByteBuf buf, int reqCapacity) { + public final boolean allocate(PooledByteBuf buf, int reqCapacity, PoolThreadCache threadCache) { Entry entry = queue.poll(); if (entry == null) { return false; } - initBuf(entry.chunk, entry.nioBuffer, entry.handle, buf, reqCapacity); + initBuf(entry.chunk, entry.nioBuffer, entry.handle, buf, reqCapacity, threadCache); entry.recycle(); // allocations is not thread-safe which is fine as this is only called from the same thread all time.