Propagate ref to pool thread cache down in the allocation stack to avoid extra thread local lookup (#10166)

Motivation:
PoolChunk requires a link to a PoolThreadCache to init ByteBuf. Currently the link is retrieved from a thread local: arena.parent.threadCache().
It has some performance cost. At the beginning of the allocation call the PoolThreadCache is already retrieved from the thread local. The reference can be propagated through the calls and used.

Modifications:
Replace second lookup of PoolThreadCache during ByteBuf init by propagation of a reference to PoolThreadCache down in the allocation stack explicitly

Result:
Improve performance of ByteBuf allocation
--Before--
Benchmark                                            (size)  (tokens)  (useThreadCache)  Mode  Cnt    Score   Error  Units
SimpleByteBufPooledAllocatorBenchmark.getAndRelease     123         0              true  avgt   20   57.112 ± 1.004  ns/op
SimpleByteBufPooledAllocatorBenchmark.getAndRelease     123       100              true  avgt   20  222.827 ± 1.307  ns/op

--After--
Benchmark                                            (size)  (tokens)  (useThreadCache)  Mode  Cnt    Score   Error  Units
SimpleByteBufPooledAllocatorBenchmark.getAndRelease     123         0              true  avgt   20   50.732 ± 1.321  ns/op
SimpleByteBufPooledAllocatorBenchmark.getAndRelease     123       100              true  avgt   20  216.892 ± 3.806  ns/op
This commit is contained in:
Dmitry Konstantinov 2020-04-06 09:01:01 +03:00 committed by GitHub
parent bdaa935756
commit 05b7af7ae4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 33 additions and 27 deletions

View File

@ -206,13 +206,13 @@ abstract class PoolArena<T> implements PoolArenaMetric {
assert s.doNotDestroy && s.elemSize == normCapacity; assert s.doNotDestroy && s.elemSize == normCapacity;
long handle = s.allocate(); long handle = s.allocate();
assert handle >= 0; assert handle >= 0;
s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity); s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity, cache);
incTinySmallAllocation(tiny); incTinySmallAllocation(tiny);
return; return;
} }
} }
synchronized (this) { synchronized (this) {
allocateNormal(buf, reqCapacity, normCapacity); allocateNormal(buf, reqCapacity, normCapacity, cache);
} }
incTinySmallAllocation(tiny); incTinySmallAllocation(tiny);
@ -224,7 +224,7 @@ abstract class PoolArena<T> implements PoolArenaMetric {
return; return;
} }
synchronized (this) { synchronized (this) {
allocateNormal(buf, reqCapacity, normCapacity); allocateNormal(buf, reqCapacity, normCapacity, cache);
++allocationsNormal; ++allocationsNormal;
} }
} else { } else {
@ -234,16 +234,18 @@ abstract class PoolArena<T> implements PoolArenaMetric {
} }
// Method must be called inside synchronized(this) { ... } block // Method must be called inside synchronized(this) { ... } block
private void allocateNormal(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) { private void allocateNormal(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
if (q050.allocate(buf, reqCapacity, normCapacity) || q025.allocate(buf, reqCapacity, normCapacity) || if (q050.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q000.allocate(buf, reqCapacity, normCapacity) || qInit.allocate(buf, reqCapacity, normCapacity) || q025.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q075.allocate(buf, reqCapacity, normCapacity)) { q000.allocate(buf, reqCapacity, normCapacity, threadCache) ||
qInit.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q075.allocate(buf, reqCapacity, normCapacity, threadCache)) {
return; return;
} }
// Add a new chunk. // Add a new chunk.
PoolChunk<T> c = newChunk(pageSize, maxOrder, pageShifts, chunkSize); PoolChunk<T> c = newChunk(pageSize, maxOrder, pageShifts, chunkSize);
boolean success = c.allocate(buf, reqCapacity, normCapacity); boolean success = c.allocate(buf, reqCapacity, normCapacity, threadCache);
assert success; assert success;
qInit.add(c); qInit.add(c);
} }

View File

@ -222,7 +222,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
return 100 - freePercentage; return 100 - freePercentage;
} }
boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) { boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
final long handle; final long handle;
if ((normCapacity & subpageOverflowMask) != 0) { // >= pageSize if ((normCapacity & subpageOverflowMask) != 0) { // >= pageSize
handle = allocateRun(normCapacity); handle = allocateRun(normCapacity);
@ -234,7 +234,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
return false; return false;
} }
ByteBuffer nioBuffer = cachedNioBuffers != null ? cachedNioBuffers.pollLast() : null; ByteBuffer nioBuffer = cachedNioBuffers != null ? cachedNioBuffers.pollLast() : null;
initBuf(buf, nioBuffer, handle, reqCapacity); initBuf(buf, nioBuffer, handle, reqCapacity, threadCache);
return true; return true;
} }
@ -399,25 +399,27 @@ final class PoolChunk<T> implements PoolChunkMetric {
} }
} }
void initBuf(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity) { void initBuf(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity,
PoolThreadCache threadCache) {
int memoryMapIdx = memoryMapIdx(handle); int memoryMapIdx = memoryMapIdx(handle);
int bitmapIdx = bitmapIdx(handle); int bitmapIdx = bitmapIdx(handle);
if (bitmapIdx == 0) { if (bitmapIdx == 0) {
byte val = value(memoryMapIdx); byte val = value(memoryMapIdx);
assert val == unusable : String.valueOf(val); assert val == unusable : String.valueOf(val);
buf.init(this, nioBuffer, handle, runOffset(memoryMapIdx) + offset, buf.init(this, nioBuffer, handle, runOffset(memoryMapIdx) + offset,
reqCapacity, runLength(memoryMapIdx), arena.parent.threadCache()); reqCapacity, runLength(memoryMapIdx), threadCache);
} else { } else {
initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx, reqCapacity); initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx, reqCapacity, threadCache);
} }
} }
void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity) { void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity,
initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx(handle), reqCapacity); PoolThreadCache threadCache) {
initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx(handle), reqCapacity, threadCache);
} }
private void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer, private void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer,
long handle, int bitmapIdx, int reqCapacity) { long handle, int bitmapIdx, int reqCapacity, PoolThreadCache threadCache) {
assert bitmapIdx != 0; assert bitmapIdx != 0;
int memoryMapIdx = memoryMapIdx(handle); int memoryMapIdx = memoryMapIdx(handle);
@ -429,7 +431,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
buf.init( buf.init(
this, nioBuffer, handle, this, nioBuffer, handle,
runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset, runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset,
reqCapacity, subpage.elemSize, arena.parent.threadCache()); reqCapacity, subpage.elemSize, threadCache);
} }
private byte value(int id) { private byte value(int id) {

View File

@ -96,7 +96,7 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
this.prevList = prevList; this.prevList = prevList;
} }
boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) { boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
if (normCapacity > maxCapacity) { if (normCapacity > maxCapacity) {
// Either this PoolChunkList is empty or the requested capacity is larger then the capacity which can // Either this PoolChunkList is empty or the requested capacity is larger then the capacity which can
// be handled by the PoolChunks that are contained in this PoolChunkList. // be handled by the PoolChunks that are contained in this PoolChunkList.
@ -104,7 +104,7 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
} }
for (PoolChunk<T> cur = head; cur != null; cur = cur.next) { for (PoolChunk<T> cur = head; cur != null; cur = cur.next) {
if (cur.allocate(buf, reqCapacity, normCapacity)) { if (cur.allocate(buf, reqCapacity, normCapacity, threadCache)) {
if (cur.freeBytes <= freeMinThreshold) { if (cur.freeBytes <= freeMinThreshold) {
remove(cur); remove(cur);
nextList.add(cur); nextList.add(cur);

View File

@ -184,7 +184,7 @@ final class PoolThreadCache {
// no cache found so just return false here // no cache found so just return false here
return false; return false;
} }
boolean allocated = cache.allocate(buf, reqCapacity); boolean allocated = cache.allocate(buf, reqCapacity, this);
if (++ allocations >= freeSweepAllocationThreshold) { if (++ allocations >= freeSweepAllocationThreshold) {
allocations = 0; allocations = 0;
trim(); trim();
@ -345,8 +345,9 @@ final class PoolThreadCache {
@Override @Override
protected void initBuf( protected void initBuf(
PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf<T> buf, int reqCapacity) { PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf<T> buf, int reqCapacity,
chunk.initBufWithSubpage(buf, nioBuffer, handle, reqCapacity); PoolThreadCache threadCache) {
chunk.initBufWithSubpage(buf, nioBuffer, handle, reqCapacity, threadCache);
} }
} }
@ -360,8 +361,9 @@ final class PoolThreadCache {
@Override @Override
protected void initBuf( protected void initBuf(
PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf<T> buf, int reqCapacity) { PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf<T> buf, int reqCapacity,
chunk.initBuf(buf, nioBuffer, handle, reqCapacity); PoolThreadCache threadCache) {
chunk.initBuf(buf, nioBuffer, handle, reqCapacity, threadCache);
} }
} }
@ -381,7 +383,7 @@ final class PoolThreadCache {
* Init the {@link PooledByteBuf} using the provided chunk and handle with the capacity restrictions. * Init the {@link PooledByteBuf} using the provided chunk and handle with the capacity restrictions.
*/ */
protected abstract void initBuf(PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, protected abstract void initBuf(PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle,
PooledByteBuf<T> buf, int reqCapacity); PooledByteBuf<T> buf, int reqCapacity, PoolThreadCache threadCache);
/** /**
* Add to cache if not already full. * Add to cache if not already full.
@ -401,12 +403,12 @@ final class PoolThreadCache {
/** /**
* Allocate something out of the cache if possible and remove the entry from the cache. * Allocate something out of the cache if possible and remove the entry from the cache.
*/ */
public final boolean allocate(PooledByteBuf<T> buf, int reqCapacity) { public final boolean allocate(PooledByteBuf<T> buf, int reqCapacity, PoolThreadCache threadCache) {
Entry<T> entry = queue.poll(); Entry<T> entry = queue.poll();
if (entry == null) { if (entry == null) {
return false; return false;
} }
initBuf(entry.chunk, entry.nioBuffer, entry.handle, buf, reqCapacity); initBuf(entry.chunk, entry.nioBuffer, entry.handle, buf, reqCapacity, threadCache);
entry.recycle(); entry.recycle();
// allocations is not thread-safe which is fine as this is only called from the same thread all time. // allocations is not thread-safe which is fine as this is only called from the same thread all time.