Optimize PooledByteBufAllocator

Motivation:

We still have a room for improvement in PoolChunk.allocateRun() and
Subpage.allocate().

Modifications:

- Unroll the recursion in PoolChunk.allocateRun()
- Subpage.allocate() makes use of the 'nextAvail' value set by previous
  free().

Result:

- PoolChunk.allocateRun() optimization yields 10%+ improvements in
  allocation throughput for non-subpage allocations.
- Subpage.allocate() optimization makes the subpage allocations for
  tiny buffers as fast as non-tiny buffers even when the pageSize is
  huge (e.g. 1048576) because it doesn't need to perform a linear search
  in most cases.
This commit is contained in:
Trustin Lee 2014-05-30 10:45:28 +09:00
parent 0cc264b76b
commit e5ed69241b
2 changed files with 97 additions and 43 deletions

View File

@ -119,26 +119,62 @@ final class PoolChunk<T> {
}
private long allocateRun(int normCapacity, int curIdx, int val) {
for (;;) {
if ((val & ST_ALLOCATED) != 0) { // state == ST_ALLOCATED || state == ST_ALLOCATED_SUBPAGE
return -1;
}
if ((val & ST_BRANCH) != 0) { // state == ST_BRANCH
int nextIdx = curIdx << 1 ^ nextRandom();
long res = allocateRun(normCapacity, nextIdx, memoryMap[nextIdx]);
if (res > 0) {
return res;
switch (val & 3) {
case ST_UNUSED:
return allocateRunSimple(normCapacity, curIdx, val);
case ST_BRANCH:
final int nextIdxLeft = curIdx << 1;
final int nextValLeft = memoryMap[nextIdxLeft];
final boolean recurseLeft;
switch (nextValLeft & 3) {
case ST_UNUSED:
return allocateRunSimple(normCapacity, nextIdxLeft, nextValLeft);
case ST_BRANCH:
recurseLeft = true;
break;
default:
recurseLeft = false;
}
curIdx = nextIdx ^ 1;
val = memoryMap[curIdx];
continue;
}
final int nextIdxRight = nextIdxLeft ^ 1;
final int nextValRight = memoryMap[nextIdxRight];
final boolean recurseRight;
switch (nextValRight & 3) {
case ST_UNUSED:
return allocateRunSimple(normCapacity, nextIdxRight, nextValRight);
case ST_BRANCH:
recurseRight = true;
break;
default:
recurseRight = false;
}
// state == ST_UNUSED
return allocateRunSimple(normCapacity, curIdx, val);
if (recurseLeft) {
long res = branchRun(normCapacity, nextIdxLeft);
if (res > 0) {
return res;
}
}
if (recurseRight) {
return branchRun(normCapacity, nextIdxRight);
}
}
return -1;
}
private long branchRun(int normCapacity, int nextIdx) {
int nextNextIdx = nextIdx << 1;
int nextNextVal = memoryMap[nextNextIdx];
long res = allocateRun(normCapacity, nextNextIdx, nextNextVal);
if (res > 0) {
return res;
}
nextNextIdx ^= 1;
nextNextVal = memoryMap[nextNextIdx];
return allocateRun(normCapacity, nextNextIdx, nextNextVal);
}
private long allocateRunSimple(int normCapacity, int curIdx, int val) {

View File

@ -19,20 +19,20 @@ package io.netty.buffer;
final class PoolSubpage<T> {
final PoolChunk<T> chunk;
final int memoryMapIdx;
final int runOffset;
final int pageSize;
final long[] bitmap;
private final int memoryMapIdx;
private final int runOffset;
private final int pageSize;
private final long[] bitmap;
PoolSubpage<T> prev;
PoolSubpage<T> next;
boolean doNotDestroy;
int elemSize;
int maxNumElems;
int nextAvail;
int bitmapLength;
int numAvail;
private int maxNumElems;
private int bitmapLength;
private int nextAvail;
private int numAvail;
// TODO: Test if adding padding helps under contention
//private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
@ -87,7 +87,7 @@ final class PoolSubpage<T> {
return -1;
}
final int bitmapIdx = nextAvail;
final int bitmapIdx = getNextAvail();
int q = bitmapIdx >>> 6;
int r = bitmapIdx & 63;
assert (bitmap[q] >>> r & 1) == 0;
@ -95,9 +95,6 @@ final class PoolSubpage<T> {
if (-- numAvail == 0) {
removeFromPool();
nextAvail = -1;
} else {
nextAvail = findNextAvailable();
}
return toHandle(bitmapIdx);
@ -118,8 +115,9 @@ final class PoolSubpage<T> {
assert (bitmap[q] >>> r & 1) != 0;
bitmap[q] ^= 1L << r;
setNextAvail(bitmapIdx);
if (numAvail ++ == 0) {
nextAvail = bitmapIdx;
addToPool();
return true;
}
@ -157,27 +155,47 @@ final class PoolSubpage<T> {
prev = null;
}
private int findNextAvailable() {
int newNextAvail = -1;
loop:
private void setNextAvail(int bitmapIdx) {
nextAvail = bitmapIdx;
}
private int getNextAvail() {
int nextAvail = this.nextAvail;
if (nextAvail >= 0) {
this.nextAvail = -1;
return nextAvail;
}
return findNextAvail();
}
private int findNextAvail() {
final long[] bitmap = this.bitmap;
final int bitmapLength = this.bitmapLength;
for (int i = 0; i < bitmapLength; i ++) {
long bits = bitmap[i];
if (~bits != 0) {
for (int j = 0; j < 64; j ++) {
if ((bits & 1) == 0) {
newNextAvail = i << 6 | j;
break loop;
}
bits >>>= 1;
}
return findNextAvail0(i, bits);
}
}
return -1;
}
if (newNextAvail < maxNumElems) {
return newNextAvail;
} else {
return -1;
private int findNextAvail0(int i, long bits) {
final int maxNumElems = this.maxNumElems;
final int baseVal = i << 6;
for (int j = 0; j < 64; j ++) {
if ((bits & 1) == 0) {
int val = baseVal | j;
if (val < maxNumElems) {
return val;
} else {
break;
}
}
bits >>>= 1;
}
return -1;
}
private long toHandle(int bitmapIdx) {