Optimize PooledByteBufAllocator

Motivation:

We still have a room for improvement in PoolChunk.allocateRun() and
Subpage.allocate().

Modifications:

- Unroll the recursion in PoolChunk.allocateRun()
- Subpage.allocate() makes use of the 'nextAvail' value set by previous
  free().

Result:

- PoolChunk.allocateRun() optimization yields 10%+ improvements in
  allocation throughput for non-subpage allocations.
- Subpage.allocate() optimization makes the subpage allocations for
  tiny buffers as fast as non-tiny buffers even when the pageSize is
  huge (e.g. 1048576) because it doesn't need to perform a linear search
  in most cases.
This commit is contained in:
Trustin Lee 2014-05-30 10:45:28 +09:00
parent b50f91f6d0
commit 1d0a79e11e
2 changed files with 97 additions and 43 deletions

View File

@ -119,26 +119,62 @@ final class PoolChunk<T> {
} }
private long allocateRun(int normCapacity, int curIdx, int val) { private long allocateRun(int normCapacity, int curIdx, int val) {
for (;;) { switch (val & 3) {
if ((val & ST_ALLOCATED) != 0) { // state == ST_ALLOCATED || state == ST_ALLOCATED_SUBPAGE case ST_UNUSED:
return allocateRunSimple(normCapacity, curIdx, val);
case ST_BRANCH:
final int nextIdxLeft = curIdx << 1;
final int nextValLeft = memoryMap[nextIdxLeft];
final boolean recurseLeft;
switch (nextValLeft & 3) {
case ST_UNUSED:
return allocateRunSimple(normCapacity, nextIdxLeft, nextValLeft);
case ST_BRANCH:
recurseLeft = true;
break;
default:
recurseLeft = false;
}
final int nextIdxRight = nextIdxLeft ^ 1;
final int nextValRight = memoryMap[nextIdxRight];
final boolean recurseRight;
switch (nextValRight & 3) {
case ST_UNUSED:
return allocateRunSimple(normCapacity, nextIdxRight, nextValRight);
case ST_BRANCH:
recurseRight = true;
break;
default:
recurseRight = false;
}
if (recurseLeft) {
long res = branchRun(normCapacity, nextIdxLeft);
if (res > 0) {
return res;
}
}
if (recurseRight) {
return branchRun(normCapacity, nextIdxRight);
}
}
return -1; return -1;
} }
if ((val & ST_BRANCH) != 0) { // state == ST_BRANCH private long branchRun(int normCapacity, int nextIdx) {
int nextIdx = curIdx << 1 ^ nextRandom(); int nextNextIdx = nextIdx << 1;
long res = allocateRun(normCapacity, nextIdx, memoryMap[nextIdx]); int nextNextVal = memoryMap[nextNextIdx];
long res = allocateRun(normCapacity, nextNextIdx, nextNextVal);
if (res > 0) { if (res > 0) {
return res; return res;
} }
curIdx = nextIdx ^ 1; nextNextIdx ^= 1;
val = memoryMap[curIdx]; nextNextVal = memoryMap[nextNextIdx];
continue; return allocateRun(normCapacity, nextNextIdx, nextNextVal);
}
// state == ST_UNUSED
return allocateRunSimple(normCapacity, curIdx, val);
}
} }
private long allocateRunSimple(int normCapacity, int curIdx, int val) { private long allocateRunSimple(int normCapacity, int curIdx, int val) {

View File

@ -19,20 +19,20 @@ package io.netty.buffer;
final class PoolSubpage<T> { final class PoolSubpage<T> {
final PoolChunk<T> chunk; final PoolChunk<T> chunk;
final int memoryMapIdx; private final int memoryMapIdx;
final int runOffset; private final int runOffset;
final int pageSize; private final int pageSize;
final long[] bitmap; private final long[] bitmap;
PoolSubpage<T> prev; PoolSubpage<T> prev;
PoolSubpage<T> next; PoolSubpage<T> next;
boolean doNotDestroy; boolean doNotDestroy;
int elemSize; int elemSize;
int maxNumElems; private int maxNumElems;
int nextAvail; private int bitmapLength;
int bitmapLength; private int nextAvail;
int numAvail; private int numAvail;
// TODO: Test if adding padding helps under contention // TODO: Test if adding padding helps under contention
//private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7; //private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
@ -87,7 +87,7 @@ final class PoolSubpage<T> {
return -1; return -1;
} }
final int bitmapIdx = nextAvail; final int bitmapIdx = getNextAvail();
int q = bitmapIdx >>> 6; int q = bitmapIdx >>> 6;
int r = bitmapIdx & 63; int r = bitmapIdx & 63;
assert (bitmap[q] >>> r & 1) == 0; assert (bitmap[q] >>> r & 1) == 0;
@ -95,9 +95,6 @@ final class PoolSubpage<T> {
if (-- numAvail == 0) { if (-- numAvail == 0) {
removeFromPool(); removeFromPool();
nextAvail = -1;
} else {
nextAvail = findNextAvailable();
} }
return toHandle(bitmapIdx); return toHandle(bitmapIdx);
@ -118,8 +115,9 @@ final class PoolSubpage<T> {
assert (bitmap[q] >>> r & 1) != 0; assert (bitmap[q] >>> r & 1) != 0;
bitmap[q] ^= 1L << r; bitmap[q] ^= 1L << r;
setNextAvail(bitmapIdx);
if (numAvail ++ == 0) { if (numAvail ++ == 0) {
nextAvail = bitmapIdx;
addToPool(); addToPool();
return true; return true;
} }
@ -157,28 +155,48 @@ final class PoolSubpage<T> {
prev = null; prev = null;
} }
private int findNextAvailable() { private void setNextAvail(int bitmapIdx) {
int newNextAvail = -1; nextAvail = bitmapIdx;
loop: }
private int getNextAvail() {
int nextAvail = this.nextAvail;
if (nextAvail >= 0) {
this.nextAvail = -1;
return nextAvail;
}
return findNextAvail();
}
private int findNextAvail() {
final long[] bitmap = this.bitmap;
final int bitmapLength = this.bitmapLength;
for (int i = 0; i < bitmapLength; i ++) { for (int i = 0; i < bitmapLength; i ++) {
long bits = bitmap[i]; long bits = bitmap[i];
if (~bits != 0) { if (~bits != 0) {
return findNextAvail0(i, bits);
}
}
return -1;
}
private int findNextAvail0(int i, long bits) {
final int maxNumElems = this.maxNumElems;
final int baseVal = i << 6;
for (int j = 0; j < 64; j ++) { for (int j = 0; j < 64; j ++) {
if ((bits & 1) == 0) { if ((bits & 1) == 0) {
newNextAvail = i << 6 | j; int val = baseVal | j;
break loop; if (val < maxNumElems) {
return val;
} else {
break;
}
} }
bits >>>= 1; bits >>>= 1;
} }
}
}
if (newNextAvail < maxNumElems) {
return newNextAvail;
} else {
return -1; return -1;
} }
}
private long toHandle(int bitmapIdx) { private long toHandle(int bitmapIdx) {
return 0x4000000000000000L | (long) bitmapIdx << 32 | memoryMapIdx; return 0x4000000000000000L | (long) bitmapIdx << 32 | memoryMapIdx;