Propagate ref to pool thread cache down in the allocation stack to avoid extra thread local lookup (#10166)

Motivation:
PoolChunk requires a link to a PoolThreadCache to init ByteBuf. Currently the link is retrieved from a thread local: arena.parent.threadCache().
It has some performance cost. At the beginning of the allocation call the PoolThreadCache is already retrieved from the thread local. The reference can be propagated through the calls and used.

Modifications:
Replace second lookup of PoolThreadCache during ByteBuf init by propagation of a reference to PoolThreadCache down in the allocation stack explicitly

Result:
Improve performance of ByteBuf allocation
--Before--
Benchmark                                            (size)  (tokens)  (useThreadCache)  Mode  Cnt    Score   Error  Units
SimpleByteBufPooledAllocatorBenchmark.getAndRelease     123         0              true  avgt   20   57.112 ± 1.004  ns/op
SimpleByteBufPooledAllocatorBenchmark.getAndRelease     123       100              true  avgt   20  222.827 ± 1.307  ns/op

--After--
Benchmark                                            (size)  (tokens)  (useThreadCache)  Mode  Cnt    Score   Error  Units
SimpleByteBufPooledAllocatorBenchmark.getAndRelease     123         0              true  avgt   20   50.732 ± 1.321  ns/op
SimpleByteBufPooledAllocatorBenchmark.getAndRelease     123       100              true  avgt   20  216.892 ± 3.806  ns/op
This commit is contained in:
Dmitry Konstantinov 2020-04-06 09:01:01 +03:00 committed by Norman Maurer
parent 760de7dde4
commit 7402a3a55a
4 changed files with 33 additions and 27 deletions

View File

@ -206,13 +206,13 @@ abstract class PoolArena<T> implements PoolArenaMetric {
assert s.doNotDestroy && s.elemSize == normCapacity;
long handle = s.allocate();
assert handle >= 0;
s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity);
s.chunk.initBufWithSubpage(buf, null, handle, reqCapacity, cache);
incTinySmallAllocation(tiny);
return;
}
}
synchronized (this) {
allocateNormal(buf, reqCapacity, normCapacity);
allocateNormal(buf, reqCapacity, normCapacity, cache);
}
incTinySmallAllocation(tiny);
@ -224,7 +224,7 @@ abstract class PoolArena<T> implements PoolArenaMetric {
return;
}
synchronized (this) {
allocateNormal(buf, reqCapacity, normCapacity);
allocateNormal(buf, reqCapacity, normCapacity, cache);
++allocationsNormal;
}
} else {
@ -234,16 +234,18 @@ abstract class PoolArena<T> implements PoolArenaMetric {
}
// Method must be called inside synchronized(this) { ... } block
private void allocateNormal(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) {
if (q050.allocate(buf, reqCapacity, normCapacity) || q025.allocate(buf, reqCapacity, normCapacity) ||
q000.allocate(buf, reqCapacity, normCapacity) || qInit.allocate(buf, reqCapacity, normCapacity) ||
q075.allocate(buf, reqCapacity, normCapacity)) {
private void allocateNormal(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
if (q050.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q025.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q000.allocate(buf, reqCapacity, normCapacity, threadCache) ||
qInit.allocate(buf, reqCapacity, normCapacity, threadCache) ||
q075.allocate(buf, reqCapacity, normCapacity, threadCache)) {
return;
}
// Add a new chunk.
PoolChunk<T> c = newChunk(pageSize, maxOrder, pageShifts, chunkSize);
boolean success = c.allocate(buf, reqCapacity, normCapacity);
boolean success = c.allocate(buf, reqCapacity, normCapacity, threadCache);
assert success;
qInit.add(c);
}

View File

@ -222,7 +222,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
return 100 - freePercentage;
}
boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) {
boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
final long handle;
if ((normCapacity & subpageOverflowMask) != 0) { // >= pageSize
handle = allocateRun(normCapacity);
@ -234,7 +234,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
return false;
}
ByteBuffer nioBuffer = cachedNioBuffers != null ? cachedNioBuffers.pollLast() : null;
initBuf(buf, nioBuffer, handle, reqCapacity);
initBuf(buf, nioBuffer, handle, reqCapacity, threadCache);
return true;
}
@ -399,25 +399,27 @@ final class PoolChunk<T> implements PoolChunkMetric {
}
}
void initBuf(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity) {
void initBuf(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity,
PoolThreadCache threadCache) {
int memoryMapIdx = memoryMapIdx(handle);
int bitmapIdx = bitmapIdx(handle);
if (bitmapIdx == 0) {
byte val = value(memoryMapIdx);
assert val == unusable : String.valueOf(val);
buf.init(this, nioBuffer, handle, runOffset(memoryMapIdx) + offset,
reqCapacity, runLength(memoryMapIdx), arena.parent.threadCache());
reqCapacity, runLength(memoryMapIdx), threadCache);
} else {
initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx, reqCapacity);
initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx, reqCapacity, threadCache);
}
}
void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity) {
initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx(handle), reqCapacity);
void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer, long handle, int reqCapacity,
PoolThreadCache threadCache) {
initBufWithSubpage(buf, nioBuffer, handle, bitmapIdx(handle), reqCapacity, threadCache);
}
private void initBufWithSubpage(PooledByteBuf<T> buf, ByteBuffer nioBuffer,
long handle, int bitmapIdx, int reqCapacity) {
long handle, int bitmapIdx, int reqCapacity, PoolThreadCache threadCache) {
assert bitmapIdx != 0;
int memoryMapIdx = memoryMapIdx(handle);
@ -429,7 +431,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
buf.init(
this, nioBuffer, handle,
runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset,
reqCapacity, subpage.elemSize, arena.parent.threadCache());
reqCapacity, subpage.elemSize, threadCache);
}
private byte value(int id) {

View File

@ -96,7 +96,7 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
this.prevList = prevList;
}
boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity) {
boolean allocate(PooledByteBuf<T> buf, int reqCapacity, int normCapacity, PoolThreadCache threadCache) {
if (normCapacity > maxCapacity) {
// Either this PoolChunkList is empty or the requested capacity is larger then the capacity which can
// be handled by the PoolChunks that are contained in this PoolChunkList.
@ -104,7 +104,7 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
}
for (PoolChunk<T> cur = head; cur != null; cur = cur.next) {
if (cur.allocate(buf, reqCapacity, normCapacity)) {
if (cur.allocate(buf, reqCapacity, normCapacity, threadCache)) {
if (cur.freeBytes <= freeMinThreshold) {
remove(cur);
nextList.add(cur);

View File

@ -183,7 +183,7 @@ final class PoolThreadCache {
// no cache found so just return false here
return false;
}
boolean allocated = cache.allocate(buf, reqCapacity);
boolean allocated = cache.allocate(buf, reqCapacity, this);
if (++ allocations >= freeSweepAllocationThreshold) {
allocations = 0;
trim();
@ -344,8 +344,9 @@ final class PoolThreadCache {
@Override
protected void initBuf(
PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf<T> buf, int reqCapacity) {
chunk.initBufWithSubpage(buf, nioBuffer, handle, reqCapacity);
PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf<T> buf, int reqCapacity,
PoolThreadCache threadCache) {
chunk.initBufWithSubpage(buf, nioBuffer, handle, reqCapacity, threadCache);
}
}
@ -359,8 +360,9 @@ final class PoolThreadCache {
@Override
protected void initBuf(
PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf<T> buf, int reqCapacity) {
chunk.initBuf(buf, nioBuffer, handle, reqCapacity);
PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle, PooledByteBuf<T> buf, int reqCapacity,
PoolThreadCache threadCache) {
chunk.initBuf(buf, nioBuffer, handle, reqCapacity, threadCache);
}
}
@ -380,7 +382,7 @@ final class PoolThreadCache {
* Init the {@link PooledByteBuf} using the provided chunk and handle with the capacity restrictions.
*/
protected abstract void initBuf(PoolChunk<T> chunk, ByteBuffer nioBuffer, long handle,
PooledByteBuf<T> buf, int reqCapacity);
PooledByteBuf<T> buf, int reqCapacity, PoolThreadCache threadCache);
/**
* Add to cache if not already full.
@ -400,12 +402,12 @@ final class PoolThreadCache {
/**
* Allocate something out of the cache if possible and remove the entry from the cache.
*/
public final boolean allocate(PooledByteBuf<T> buf, int reqCapacity) {
public final boolean allocate(PooledByteBuf<T> buf, int reqCapacity, PoolThreadCache threadCache) {
Entry<T> entry = queue.poll();
if (entry == null) {
return false;
}
initBuf(entry.chunk, entry.nioBuffer, entry.handle, buf, reqCapacity);
initBuf(entry.chunk, entry.nioBuffer, entry.handle, buf, reqCapacity, threadCache);
entry.recycle();
// allocations is not thread-safe which is fine as this is only called from the same thread all time.