Optimize PooledByteBufAllocator
Motivation: We still have a room for improvement in PoolChunk.allocateRun() and Subpage.allocate(). Modifications: - Unroll the recursion in PoolChunk.allocateRun() - Subpage.allocate() makes use of the 'nextAvail' value set by previous free(). Result: - PoolChunk.allocateRun() optimization yields 10%+ improvements in allocation throughput for non-subpage allocations. - Subpage.allocate() optimization makes the subpage allocations for tiny buffers as fast as non-tiny buffers even when the pageSize is huge (e.g. 1048576) because it doesn't need to perform a linear search in most cases.
This commit is contained in:
parent
b50f91f6d0
commit
1d0a79e11e
@ -119,26 +119,62 @@ final class PoolChunk<T> {
|
||||
}
|
||||
|
||||
private long allocateRun(int normCapacity, int curIdx, int val) {
|
||||
for (;;) {
|
||||
if ((val & ST_ALLOCATED) != 0) { // state == ST_ALLOCATED || state == ST_ALLOCATED_SUBPAGE
|
||||
switch (val & 3) {
|
||||
case ST_UNUSED:
|
||||
return allocateRunSimple(normCapacity, curIdx, val);
|
||||
case ST_BRANCH:
|
||||
final int nextIdxLeft = curIdx << 1;
|
||||
final int nextValLeft = memoryMap[nextIdxLeft];
|
||||
final boolean recurseLeft;
|
||||
switch (nextValLeft & 3) {
|
||||
case ST_UNUSED:
|
||||
return allocateRunSimple(normCapacity, nextIdxLeft, nextValLeft);
|
||||
case ST_BRANCH:
|
||||
recurseLeft = true;
|
||||
break;
|
||||
default:
|
||||
recurseLeft = false;
|
||||
}
|
||||
|
||||
final int nextIdxRight = nextIdxLeft ^ 1;
|
||||
final int nextValRight = memoryMap[nextIdxRight];
|
||||
final boolean recurseRight;
|
||||
switch (nextValRight & 3) {
|
||||
case ST_UNUSED:
|
||||
return allocateRunSimple(normCapacity, nextIdxRight, nextValRight);
|
||||
case ST_BRANCH:
|
||||
recurseRight = true;
|
||||
break;
|
||||
default:
|
||||
recurseRight = false;
|
||||
}
|
||||
|
||||
if (recurseLeft) {
|
||||
long res = branchRun(normCapacity, nextIdxLeft);
|
||||
if (res > 0) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
if (recurseRight) {
|
||||
return branchRun(normCapacity, nextIdxRight);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((val & ST_BRANCH) != 0) { // state == ST_BRANCH
|
||||
int nextIdx = curIdx << 1 ^ nextRandom();
|
||||
long res = allocateRun(normCapacity, nextIdx, memoryMap[nextIdx]);
|
||||
private long branchRun(int normCapacity, int nextIdx) {
|
||||
int nextNextIdx = nextIdx << 1;
|
||||
int nextNextVal = memoryMap[nextNextIdx];
|
||||
long res = allocateRun(normCapacity, nextNextIdx, nextNextVal);
|
||||
if (res > 0) {
|
||||
return res;
|
||||
}
|
||||
|
||||
curIdx = nextIdx ^ 1;
|
||||
val = memoryMap[curIdx];
|
||||
continue;
|
||||
}
|
||||
|
||||
// state == ST_UNUSED
|
||||
return allocateRunSimple(normCapacity, curIdx, val);
|
||||
}
|
||||
nextNextIdx ^= 1;
|
||||
nextNextVal = memoryMap[nextNextIdx];
|
||||
return allocateRun(normCapacity, nextNextIdx, nextNextVal);
|
||||
}
|
||||
|
||||
private long allocateRunSimple(int normCapacity, int curIdx, int val) {
|
||||
|
@ -19,20 +19,20 @@ package io.netty.buffer;
|
||||
final class PoolSubpage<T> {
|
||||
|
||||
final PoolChunk<T> chunk;
|
||||
final int memoryMapIdx;
|
||||
final int runOffset;
|
||||
final int pageSize;
|
||||
final long[] bitmap;
|
||||
private final int memoryMapIdx;
|
||||
private final int runOffset;
|
||||
private final int pageSize;
|
||||
private final long[] bitmap;
|
||||
|
||||
PoolSubpage<T> prev;
|
||||
PoolSubpage<T> next;
|
||||
|
||||
boolean doNotDestroy;
|
||||
int elemSize;
|
||||
int maxNumElems;
|
||||
int nextAvail;
|
||||
int bitmapLength;
|
||||
int numAvail;
|
||||
private int maxNumElems;
|
||||
private int bitmapLength;
|
||||
private int nextAvail;
|
||||
private int numAvail;
|
||||
|
||||
// TODO: Test if adding padding helps under contention
|
||||
//private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
|
||||
@ -87,7 +87,7 @@ final class PoolSubpage<T> {
|
||||
return -1;
|
||||
}
|
||||
|
||||
final int bitmapIdx = nextAvail;
|
||||
final int bitmapIdx = getNextAvail();
|
||||
int q = bitmapIdx >>> 6;
|
||||
int r = bitmapIdx & 63;
|
||||
assert (bitmap[q] >>> r & 1) == 0;
|
||||
@ -95,9 +95,6 @@ final class PoolSubpage<T> {
|
||||
|
||||
if (-- numAvail == 0) {
|
||||
removeFromPool();
|
||||
nextAvail = -1;
|
||||
} else {
|
||||
nextAvail = findNextAvailable();
|
||||
}
|
||||
|
||||
return toHandle(bitmapIdx);
|
||||
@ -118,8 +115,9 @@ final class PoolSubpage<T> {
|
||||
assert (bitmap[q] >>> r & 1) != 0;
|
||||
bitmap[q] ^= 1L << r;
|
||||
|
||||
setNextAvail(bitmapIdx);
|
||||
|
||||
if (numAvail ++ == 0) {
|
||||
nextAvail = bitmapIdx;
|
||||
addToPool();
|
||||
return true;
|
||||
}
|
||||
@ -157,28 +155,48 @@ final class PoolSubpage<T> {
|
||||
prev = null;
|
||||
}
|
||||
|
||||
private int findNextAvailable() {
|
||||
int newNextAvail = -1;
|
||||
loop:
|
||||
private void setNextAvail(int bitmapIdx) {
|
||||
nextAvail = bitmapIdx;
|
||||
}
|
||||
|
||||
private int getNextAvail() {
|
||||
int nextAvail = this.nextAvail;
|
||||
if (nextAvail >= 0) {
|
||||
this.nextAvail = -1;
|
||||
return nextAvail;
|
||||
}
|
||||
return findNextAvail();
|
||||
}
|
||||
|
||||
private int findNextAvail() {
|
||||
final long[] bitmap = this.bitmap;
|
||||
final int bitmapLength = this.bitmapLength;
|
||||
for (int i = 0; i < bitmapLength; i ++) {
|
||||
long bits = bitmap[i];
|
||||
if (~bits != 0) {
|
||||
return findNextAvail0(i, bits);
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private int findNextAvail0(int i, long bits) {
|
||||
final int maxNumElems = this.maxNumElems;
|
||||
final int baseVal = i << 6;
|
||||
|
||||
for (int j = 0; j < 64; j ++) {
|
||||
if ((bits & 1) == 0) {
|
||||
newNextAvail = i << 6 | j;
|
||||
break loop;
|
||||
int val = baseVal | j;
|
||||
if (val < maxNumElems) {
|
||||
return val;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
bits >>>= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (newNextAvail < maxNumElems) {
|
||||
return newNextAvail;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
private long toHandle(int bitmapIdx) {
|
||||
return 0x4000000000000000L | (long) bitmapIdx << 32 | memoryMapIdx;
|
||||
|
Loading…
Reference in New Issue
Block a user