Allow to allign allocated Buffers
Motivation: 64-byte alignment is recommended by the Intel performance guide (https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors) for data-structures over 64 bytes. Requiring padding to a multiple of 64 bytes allows for using SIMD instructions consistently in loops without additional conditional checks. This should allow for simpler and more efficient code. Modification: At the moment cache alignment must be setup manually. But probably it might be taken from the system. The original code was introduced by @normanmaurer https://github.com/netty/netty/pull/4726/files Result: Buffer alignment works better than miss-align cache.
This commit is contained in:
parent
48f6541cb3
commit
66b9be3a46
@ -47,6 +47,8 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
final int chunkSize;
|
final int chunkSize;
|
||||||
final int subpageOverflowMask;
|
final int subpageOverflowMask;
|
||||||
final int numSmallSubpagePools;
|
final int numSmallSubpagePools;
|
||||||
|
final int directMemoryCacheAlignment;
|
||||||
|
final int directMemoryCacheAlignmentMask;
|
||||||
private final PoolSubpage<T>[] tinySubpagePools;
|
private final PoolSubpage<T>[] tinySubpagePools;
|
||||||
private final PoolSubpage<T>[] smallSubpagePools;
|
private final PoolSubpage<T>[] smallSubpagePools;
|
||||||
|
|
||||||
@ -80,12 +82,15 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
// TODO: Test if adding padding helps under contention
|
// TODO: Test if adding padding helps under contention
|
||||||
//private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
|
//private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
|
||||||
|
|
||||||
protected PoolArena(PooledByteBufAllocator parent, int pageSize, int maxOrder, int pageShifts, int chunkSize) {
|
protected PoolArena(PooledByteBufAllocator parent, int pageSize,
|
||||||
|
int maxOrder, int pageShifts, int chunkSize, int cacheAlignment) {
|
||||||
this.parent = parent;
|
this.parent = parent;
|
||||||
this.pageSize = pageSize;
|
this.pageSize = pageSize;
|
||||||
this.maxOrder = maxOrder;
|
this.maxOrder = maxOrder;
|
||||||
this.pageShifts = pageShifts;
|
this.pageShifts = pageShifts;
|
||||||
this.chunkSize = chunkSize;
|
this.chunkSize = chunkSize;
|
||||||
|
this.directMemoryCacheAlignment = cacheAlignment;
|
||||||
|
this.directMemoryCacheAlignmentMask = cacheAlignment - 1;
|
||||||
subpageOverflowMask = ~(pageSize - 1);
|
subpageOverflowMask = ~(pageSize - 1);
|
||||||
tinySubpagePools = newSubpagePoolArray(numTinySubpagePools);
|
tinySubpagePools = newSubpagePoolArray(numTinySubpagePools);
|
||||||
for (int i = 0; i < tinySubpagePools.length; i ++) {
|
for (int i = 0; i < tinySubpagePools.length; i ++) {
|
||||||
@ -329,8 +334,9 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
if (reqCapacity < 0) {
|
if (reqCapacity < 0) {
|
||||||
throw new IllegalArgumentException("capacity: " + reqCapacity + " (expected: 0+)");
|
throw new IllegalArgumentException("capacity: " + reqCapacity + " (expected: 0+)");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reqCapacity >= chunkSize) {
|
if (reqCapacity >= chunkSize) {
|
||||||
return reqCapacity;
|
return directMemoryCacheAlignment == 0 ? reqCapacity : alignCapacity(reqCapacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isTiny(reqCapacity)) { // >= 512
|
if (!isTiny(reqCapacity)) { // >= 512
|
||||||
@ -348,10 +354,15 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
if (normalizedCapacity < 0) {
|
if (normalizedCapacity < 0) {
|
||||||
normalizedCapacity >>>= 1;
|
normalizedCapacity >>>= 1;
|
||||||
}
|
}
|
||||||
|
assert directMemoryCacheAlignment == 0 || (normalizedCapacity & directMemoryCacheAlignmentMask) == 0;
|
||||||
|
|
||||||
return normalizedCapacity;
|
return normalizedCapacity;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (directMemoryCacheAlignment > 0) {
|
||||||
|
return alignCapacity(reqCapacity);
|
||||||
|
}
|
||||||
|
|
||||||
// Quantum-spaced
|
// Quantum-spaced
|
||||||
if ((reqCapacity & 15) == 0) {
|
if ((reqCapacity & 15) == 0) {
|
||||||
return reqCapacity;
|
return reqCapacity;
|
||||||
@ -360,6 +371,11 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
return (reqCapacity & ~15) + 16;
|
return (reqCapacity & ~15) + 16;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int alignCapacity(int reqCapacity) {
|
||||||
|
int delta = reqCapacity & directMemoryCacheAlignmentMask;
|
||||||
|
return delta == 0 ? reqCapacity : reqCapacity + directMemoryCacheAlignment - delta;
|
||||||
|
}
|
||||||
|
|
||||||
void reallocate(PooledByteBuf<T> buf, int newCapacity, boolean freeOldMemory) {
|
void reallocate(PooledByteBuf<T> buf, int newCapacity, boolean freeOldMemory) {
|
||||||
if (newCapacity < 0 || newCapacity > buf.maxCapacity()) {
|
if (newCapacity < 0 || newCapacity > buf.maxCapacity()) {
|
||||||
throw new IllegalArgumentException("newCapacity: " + newCapacity);
|
throw new IllegalArgumentException("newCapacity: " + newCapacity);
|
||||||
@ -650,8 +666,10 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
|
|
||||||
static final class HeapArena extends PoolArena<byte[]> {
|
static final class HeapArena extends PoolArena<byte[]> {
|
||||||
|
|
||||||
HeapArena(PooledByteBufAllocator parent, int pageSize, int maxOrder, int pageShifts, int chunkSize) {
|
HeapArena(PooledByteBufAllocator parent, int pageSize, int maxOrder,
|
||||||
super(parent, pageSize, maxOrder, pageShifts, chunkSize);
|
int pageShifts, int chunkSize, int directMemoryCacheAlignment) {
|
||||||
|
super(parent, pageSize, maxOrder, pageShifts, chunkSize,
|
||||||
|
directMemoryCacheAlignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -661,12 +679,12 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected PoolChunk<byte[]> newChunk(int pageSize, int maxOrder, int pageShifts, int chunkSize) {
|
protected PoolChunk<byte[]> newChunk(int pageSize, int maxOrder, int pageShifts, int chunkSize) {
|
||||||
return new PoolChunk<byte[]>(this, new byte[chunkSize], pageSize, maxOrder, pageShifts, chunkSize);
|
return new PoolChunk<byte[]>(this, new byte[chunkSize], pageSize, maxOrder, pageShifts, chunkSize, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected PoolChunk<byte[]> newUnpooledChunk(int capacity) {
|
protected PoolChunk<byte[]> newUnpooledChunk(int capacity) {
|
||||||
return new PoolChunk<byte[]>(this, new byte[capacity], capacity);
|
return new PoolChunk<byte[]>(this, new byte[capacity], capacity, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -692,8 +710,10 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
|
|
||||||
static final class DirectArena extends PoolArena<ByteBuffer> {
|
static final class DirectArena extends PoolArena<ByteBuffer> {
|
||||||
|
|
||||||
DirectArena(PooledByteBufAllocator parent, int pageSize, int maxOrder, int pageShifts, int chunkSize) {
|
DirectArena(PooledByteBufAllocator parent, int pageSize, int maxOrder,
|
||||||
super(parent, pageSize, maxOrder, pageShifts, chunkSize);
|
int pageShifts, int chunkSize, int directMemoryCacheAlignment) {
|
||||||
|
super(parent, pageSize, maxOrder, pageShifts, chunkSize,
|
||||||
|
directMemoryCacheAlignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -701,16 +721,35 @@ abstract class PoolArena<T> implements PoolArenaMetric {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int offsetCacheLine(ByteBuffer memory) {
|
||||||
|
return (int) (PlatformDependent.directBufferAddress(memory) & directMemoryCacheAlignmentMask);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected PoolChunk<ByteBuffer> newChunk(int pageSize, int maxOrder, int pageShifts, int chunkSize) {
|
protected PoolChunk<ByteBuffer> newChunk(int pageSize, int maxOrder,
|
||||||
return new PoolChunk<ByteBuffer>(
|
int pageShifts, int chunkSize) {
|
||||||
this, allocateDirect(chunkSize),
|
if (directMemoryCacheAlignment == 0) {
|
||||||
pageSize, maxOrder, pageShifts, chunkSize);
|
return new PoolChunk<ByteBuffer>(this,
|
||||||
|
allocateDirect(chunkSize), pageSize, maxOrder,
|
||||||
|
pageShifts, chunkSize, 0);
|
||||||
|
}
|
||||||
|
final ByteBuffer memory = allocateDirect(chunkSize
|
||||||
|
+ directMemoryCacheAlignment);
|
||||||
|
return new PoolChunk<ByteBuffer>(this, memory, pageSize,
|
||||||
|
maxOrder, pageShifts, chunkSize,
|
||||||
|
offsetCacheLine(memory));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected PoolChunk<ByteBuffer> newUnpooledChunk(int capacity) {
|
protected PoolChunk<ByteBuffer> newUnpooledChunk(int capacity) {
|
||||||
return new PoolChunk<ByteBuffer>(this, allocateDirect(capacity), capacity);
|
if (directMemoryCacheAlignment == 0) {
|
||||||
|
return new PoolChunk<ByteBuffer>(this,
|
||||||
|
allocateDirect(capacity), capacity, 0);
|
||||||
|
}
|
||||||
|
final ByteBuffer memory = allocateDirect(capacity
|
||||||
|
+ directMemoryCacheAlignment);
|
||||||
|
return new PoolChunk<ByteBuffer>(this, memory, capacity,
|
||||||
|
offsetCacheLine(memory));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ByteBuffer allocateDirect(int capacity) {
|
private static ByteBuffer allocateDirect(int capacity) {
|
||||||
|
@ -107,6 +107,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
|
|||||||
final PoolArena<T> arena;
|
final PoolArena<T> arena;
|
||||||
final T memory;
|
final T memory;
|
||||||
final boolean unpooled;
|
final boolean unpooled;
|
||||||
|
final int offset;
|
||||||
|
|
||||||
private final byte[] memoryMap;
|
private final byte[] memoryMap;
|
||||||
private final byte[] depthMap;
|
private final byte[] depthMap;
|
||||||
@ -131,7 +132,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
|
|||||||
// TODO: Test if adding padding helps under contention
|
// TODO: Test if adding padding helps under contention
|
||||||
//private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
|
//private long pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7;
|
||||||
|
|
||||||
PoolChunk(PoolArena<T> arena, T memory, int pageSize, int maxOrder, int pageShifts, int chunkSize) {
|
PoolChunk(PoolArena<T> arena, T memory, int pageSize, int maxOrder, int pageShifts, int chunkSize, int offset) {
|
||||||
unpooled = false;
|
unpooled = false;
|
||||||
this.arena = arena;
|
this.arena = arena;
|
||||||
this.memory = memory;
|
this.memory = memory;
|
||||||
@ -139,6 +140,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
|
|||||||
this.pageShifts = pageShifts;
|
this.pageShifts = pageShifts;
|
||||||
this.maxOrder = maxOrder;
|
this.maxOrder = maxOrder;
|
||||||
this.chunkSize = chunkSize;
|
this.chunkSize = chunkSize;
|
||||||
|
this.offset = offset;
|
||||||
unusable = (byte) (maxOrder + 1);
|
unusable = (byte) (maxOrder + 1);
|
||||||
log2ChunkSize = log2(chunkSize);
|
log2ChunkSize = log2(chunkSize);
|
||||||
subpageOverflowMask = ~(pageSize - 1);
|
subpageOverflowMask = ~(pageSize - 1);
|
||||||
@ -165,10 +167,11 @@ final class PoolChunk<T> implements PoolChunkMetric {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Creates a special chunk that is not pooled. */
|
/** Creates a special chunk that is not pooled. */
|
||||||
PoolChunk(PoolArena<T> arena, T memory, int size) {
|
PoolChunk(PoolArena<T> arena, T memory, int size, int offset) {
|
||||||
unpooled = true;
|
unpooled = true;
|
||||||
this.arena = arena;
|
this.arena = arena;
|
||||||
this.memory = memory;
|
this.memory = memory;
|
||||||
|
this.offset = offset;
|
||||||
memoryMap = null;
|
memoryMap = null;
|
||||||
depthMap = null;
|
depthMap = null;
|
||||||
subpages = null;
|
subpages = null;
|
||||||
@ -371,7 +374,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
|
|||||||
if (bitmapIdx == 0) {
|
if (bitmapIdx == 0) {
|
||||||
byte val = value(memoryMapIdx);
|
byte val = value(memoryMapIdx);
|
||||||
assert val == unusable : String.valueOf(val);
|
assert val == unusable : String.valueOf(val);
|
||||||
buf.init(this, handle, runOffset(memoryMapIdx), reqCapacity, runLength(memoryMapIdx),
|
buf.init(this, handle, runOffset(memoryMapIdx) + offset, reqCapacity, runLength(memoryMapIdx),
|
||||||
arena.parent.threadCache());
|
arena.parent.threadCache());
|
||||||
} else {
|
} else {
|
||||||
initBufWithSubpage(buf, handle, bitmapIdx, reqCapacity);
|
initBufWithSubpage(buf, handle, bitmapIdx, reqCapacity);
|
||||||
@ -393,8 +396,8 @@ final class PoolChunk<T> implements PoolChunkMetric {
|
|||||||
|
|
||||||
buf.init(
|
buf.init(
|
||||||
this, handle,
|
this, handle,
|
||||||
runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize, reqCapacity, subpage.elemSize,
|
runOffset(memoryMapIdx) + (bitmapIdx & 0x3FFFFFFF) * subpage.elemSize + offset,
|
||||||
arena.parent.threadCache());
|
reqCapacity, subpage.elemSize, arena.parent.threadCache());
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte value(int id) {
|
private byte value(int id) {
|
||||||
|
@ -63,7 +63,7 @@ abstract class PooledByteBuf<T> extends AbstractReferenceCountedByteBuf {
|
|||||||
this.chunk = chunk;
|
this.chunk = chunk;
|
||||||
handle = 0;
|
handle = 0;
|
||||||
memory = chunk.memory;
|
memory = chunk.memory;
|
||||||
offset = 0;
|
offset = chunk.offset;
|
||||||
this.length = maxLength = length;
|
this.length = maxLength = length;
|
||||||
tmpNioBuf = null;
|
tmpNioBuf = null;
|
||||||
cache = null;
|
cache = null;
|
||||||
|
@ -43,6 +43,7 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator {
|
|||||||
private static final int DEFAULT_MAX_CACHED_BUFFER_CAPACITY;
|
private static final int DEFAULT_MAX_CACHED_BUFFER_CAPACITY;
|
||||||
private static final int DEFAULT_CACHE_TRIM_INTERVAL;
|
private static final int DEFAULT_CACHE_TRIM_INTERVAL;
|
||||||
private static final boolean DEFAULT_USE_CACHE_FOR_ALL_THREADS;
|
private static final boolean DEFAULT_USE_CACHE_FOR_ALL_THREADS;
|
||||||
|
private static final int DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT;
|
||||||
|
|
||||||
private static final int MIN_PAGE_SIZE = 4096;
|
private static final int MIN_PAGE_SIZE = 4096;
|
||||||
private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
|
private static final int MAX_CHUNK_SIZE = (int) (((long) Integer.MAX_VALUE + 1) / 2);
|
||||||
@ -108,6 +109,9 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator {
|
|||||||
DEFAULT_USE_CACHE_FOR_ALL_THREADS = SystemPropertyUtil.getBoolean(
|
DEFAULT_USE_CACHE_FOR_ALL_THREADS = SystemPropertyUtil.getBoolean(
|
||||||
"io.netty.allocator.useCacheForAllThreads", true);
|
"io.netty.allocator.useCacheForAllThreads", true);
|
||||||
|
|
||||||
|
DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT = SystemPropertyUtil.getInt(
|
||||||
|
"io.netty.allocator.directMemoryCacheAlignment", 0);
|
||||||
|
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
logger.debug("-Dio.netty.allocator.numHeapArenas: {}", DEFAULT_NUM_HEAP_ARENA);
|
logger.debug("-Dio.netty.allocator.numHeapArenas: {}", DEFAULT_NUM_HEAP_ARENA);
|
||||||
logger.debug("-Dio.netty.allocator.numDirectArenas: {}", DEFAULT_NUM_DIRECT_ARENA);
|
logger.debug("-Dio.netty.allocator.numDirectArenas: {}", DEFAULT_NUM_DIRECT_ARENA);
|
||||||
@ -175,12 +179,21 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator {
|
|||||||
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder,
|
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder,
|
||||||
int tinyCacheSize, int smallCacheSize, int normalCacheSize) {
|
int tinyCacheSize, int smallCacheSize, int normalCacheSize) {
|
||||||
this(preferDirect, nHeapArena, nDirectArena, pageSize, maxOrder, tinyCacheSize, smallCacheSize,
|
this(preferDirect, nHeapArena, nDirectArena, pageSize, maxOrder, tinyCacheSize, smallCacheSize,
|
||||||
normalCacheSize, DEFAULT_USE_CACHE_FOR_ALL_THREADS);
|
normalCacheSize, DEFAULT_USE_CACHE_FOR_ALL_THREADS, DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena,
|
||||||
|
int nDirectArena, int pageSize, int maxOrder, int tinyCacheSize,
|
||||||
|
int smallCacheSize, int normalCacheSize,
|
||||||
|
boolean useCacheForAllThreads) {
|
||||||
|
this(preferDirect, nHeapArena, nDirectArena, pageSize, maxOrder,
|
||||||
|
tinyCacheSize, smallCacheSize, normalCacheSize,
|
||||||
|
useCacheForAllThreads, DEFAULT_DIRECT_MEMORY_CACHE_ALIGNMENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder,
|
public PooledByteBufAllocator(boolean preferDirect, int nHeapArena, int nDirectArena, int pageSize, int maxOrder,
|
||||||
int tinyCacheSize, int smallCacheSize, int normalCacheSize,
|
int tinyCacheSize, int smallCacheSize, int normalCacheSize,
|
||||||
boolean useCacheForAllThreads) {
|
boolean useCacheForAllThreads, int directMemoryCacheAlignment) {
|
||||||
super(preferDirect);
|
super(preferDirect);
|
||||||
threadCache = new PoolThreadLocalCache(useCacheForAllThreads);
|
threadCache = new PoolThreadLocalCache(useCacheForAllThreads);
|
||||||
this.tinyCacheSize = tinyCacheSize;
|
this.tinyCacheSize = tinyCacheSize;
|
||||||
@ -195,13 +208,25 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator {
|
|||||||
throw new IllegalArgumentException("nDirectArea: " + nDirectArena + " (expected: >= 0)");
|
throw new IllegalArgumentException("nDirectArea: " + nDirectArena + " (expected: >= 0)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (directMemoryCacheAlignment < 0) {
|
||||||
|
throw new IllegalArgumentException("directMemoryCacheAlignment: "
|
||||||
|
+ directMemoryCacheAlignment + " (expected: >= 0)");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((directMemoryCacheAlignment & -directMemoryCacheAlignment) != directMemoryCacheAlignment) {
|
||||||
|
throw new IllegalArgumentException("directMemoryCacheAlignment: "
|
||||||
|
+ directMemoryCacheAlignment + " (expected: power of two)");
|
||||||
|
}
|
||||||
|
|
||||||
int pageShifts = validateAndCalculatePageShifts(pageSize);
|
int pageShifts = validateAndCalculatePageShifts(pageSize);
|
||||||
|
|
||||||
if (nHeapArena > 0) {
|
if (nHeapArena > 0) {
|
||||||
heapArenas = newArenaArray(nHeapArena);
|
heapArenas = newArenaArray(nHeapArena);
|
||||||
List<PoolArenaMetric> metrics = new ArrayList<PoolArenaMetric>(heapArenas.length);
|
List<PoolArenaMetric> metrics = new ArrayList<PoolArenaMetric>(heapArenas.length);
|
||||||
for (int i = 0; i < heapArenas.length; i ++) {
|
for (int i = 0; i < heapArenas.length; i ++) {
|
||||||
PoolArena.HeapArena arena = new PoolArena.HeapArena(this, pageSize, maxOrder, pageShifts, chunkSize);
|
PoolArena.HeapArena arena = new PoolArena.HeapArena(this,
|
||||||
|
pageSize, maxOrder, pageShifts, chunkSize,
|
||||||
|
directMemoryCacheAlignment);
|
||||||
heapArenas[i] = arena;
|
heapArenas[i] = arena;
|
||||||
metrics.add(arena);
|
metrics.add(arena);
|
||||||
}
|
}
|
||||||
@ -216,7 +241,7 @@ public class PooledByteBufAllocator extends AbstractByteBufAllocator {
|
|||||||
List<PoolArenaMetric> metrics = new ArrayList<PoolArenaMetric>(directArenas.length);
|
List<PoolArenaMetric> metrics = new ArrayList<PoolArenaMetric>(directArenas.length);
|
||||||
for (int i = 0; i < directArenas.length; i ++) {
|
for (int i = 0; i < directArenas.length; i ++) {
|
||||||
PoolArena.DirectArena arena = new PoolArena.DirectArena(
|
PoolArena.DirectArena arena = new PoolArena.DirectArena(
|
||||||
this, pageSize, maxOrder, pageShifts, chunkSize);
|
this, pageSize, maxOrder, pageShifts, chunkSize, directMemoryCacheAlignment);
|
||||||
directArenas[i] = arena;
|
directArenas[i] = arena;
|
||||||
metrics.add(arena);
|
metrics.add(arena);
|
||||||
}
|
}
|
||||||
|
@ -25,7 +25,7 @@ public class PoolArenaTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNormalizeCapacity() throws Exception {
|
public void testNormalizeCapacity() throws Exception {
|
||||||
PoolArena<ByteBuffer> arena = new PoolArena.DirectArena(null, 0, 0, 9, 999999);
|
PoolArena<ByteBuffer> arena = new PoolArena.DirectArena(null, 0, 0, 9, 999999, 0);
|
||||||
int[] reqCapacities = {0, 15, 510, 1024, 1023, 1025};
|
int[] reqCapacities = {0, 15, 510, 1024, 1023, 1025};
|
||||||
int[] expectedResult = {0, 16, 512, 1024, 1024, 2048};
|
int[] expectedResult = {0, 16, 512, 1024, 1024, 2048};
|
||||||
for (int i = 0; i < reqCapacities.length; i ++) {
|
for (int i = 0; i < reqCapacities.length; i ++) {
|
||||||
@ -33,6 +33,16 @@ public class PoolArenaTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNormalizeAlignedCapacity() throws Exception {
|
||||||
|
PoolArena<ByteBuffer> arena = new PoolArena.DirectArena(null, 0, 0, 9, 999999, 64);
|
||||||
|
int[] reqCapacities = {0, 15, 510, 1024, 1023, 1025};
|
||||||
|
int[] expectedResult = {0, 64, 512, 1024, 1024, 2048};
|
||||||
|
for (int i = 0; i < reqCapacities.length; i ++) {
|
||||||
|
Assert.assertEquals(expectedResult[i], arena.normalizeCapacity(reqCapacities[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public final void testAllocationCounter() {
|
public final void testAllocationCounter() {
|
||||||
final PooledByteBufAllocator allocator = new PooledByteBufAllocator(
|
final PooledByteBufAllocator allocator = new PooledByteBufAllocator(
|
||||||
|
@ -52,6 +52,16 @@ public class PooledByteBufAllocatorTest extends AbstractByteBufAllocatorTest {
|
|||||||
testArenaMetrics0(new PooledByteBufAllocator(true, 2, 2, 8192, 11, 1000, 1000, 1000), 100, 1, 1, 0);
|
testArenaMetrics0(new PooledByteBufAllocator(true, 2, 2, 8192, 11, 1000, 1000, 1000), 100, 1, 1, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testArenaMetricsNoCacheAlign() {
|
||||||
|
testArenaMetrics0(new PooledByteBufAllocator(true, 2, 2, 8192, 11, 0, 0, 0, true, 64), 100, 0, 100, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testArenaMetricsCacheAlign() {
|
||||||
|
testArenaMetrics0(new PooledByteBufAllocator(true, 2, 2, 8192, 11, 1000, 1000, 1000, true, 64), 100, 1, 1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
private static void testArenaMetrics0(
|
private static void testArenaMetrics0(
|
||||||
PooledByteBufAllocator allocator, int num, int expectedActive, int expectedAlloc, int expectedDealloc) {
|
PooledByteBufAllocator allocator, int num, int expectedActive, int expectedAlloc, int expectedDealloc) {
|
||||||
for (int i = 0; i < num; i++) {
|
for (int i = 0; i < num; i++) {
|
||||||
|
@ -35,7 +35,7 @@ public class ByteBufAllocatorBenchmark extends AbstractMicrobenchmark {
|
|||||||
|
|
||||||
private static final ByteBufAllocator unpooledAllocator = new UnpooledByteBufAllocator(true);
|
private static final ByteBufAllocator unpooledAllocator = new UnpooledByteBufAllocator(true);
|
||||||
private static final ByteBufAllocator pooledAllocator =
|
private static final ByteBufAllocator pooledAllocator =
|
||||||
new PooledByteBufAllocator(true, 4, 4, 8192, 11, 0, 0, 0); // Disable thread-local cache
|
new PooledByteBufAllocator(true, 4, 4, 8192, 11, 0, 0, 0, true, 0); // Disable thread-local cache
|
||||||
|
|
||||||
private static final int MAX_LIVE_BUFFERS = 8192;
|
private static final int MAX_LIVE_BUFFERS = 8192;
|
||||||
private static final Random rand = new Random();
|
private static final Random rand = new Random();
|
||||||
|
@ -0,0 +1,152 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2017 The Netty Project
|
||||||
|
*
|
||||||
|
* The Netty Project licenses this file to you under the Apache License,
|
||||||
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at:
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package io.netty.microbench.buffer;
|
||||||
|
|
||||||
|
import io.netty.buffer.ByteBuf;
|
||||||
|
import io.netty.buffer.PooledByteBufAllocator;
|
||||||
|
import io.netty.microbench.util.AbstractMicrobenchmark;
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.TearDown;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
|
||||||
|
@State(Scope.Thread)
|
||||||
|
@Warmup(iterations = 5, time = 100, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Measurement(iterations = 5, time = 100, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Fork(5)
|
||||||
|
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||||
|
public class PooledByteBufAllocatorAlignBenchmark extends
|
||||||
|
AbstractMicrobenchmark {
|
||||||
|
|
||||||
|
private static final Random rand = new Random();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cache line power of 2.
|
||||||
|
*/
|
||||||
|
private static final int CACHE_LINE_MAX = 256;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PRNG to walk the chunk randomly to avoid streaming reads.
|
||||||
|
*/
|
||||||
|
private static final int OFFSET_ADD = CACHE_LINE_MAX * 1337;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Block of bytes to write/read. (Corresponds to int type)
|
||||||
|
*/
|
||||||
|
private static final int BLOCK = 4;
|
||||||
|
|
||||||
|
@Param({ "0", "64" })
|
||||||
|
private int cacheAlign;
|
||||||
|
|
||||||
|
@Param({ "01024", "04096", "16384", "65536", "1048576" })
|
||||||
|
private int size;
|
||||||
|
|
||||||
|
private ByteBuf pooledDirectBuffer;
|
||||||
|
|
||||||
|
private byte[] bytes;
|
||||||
|
|
||||||
|
private int sizeMask;
|
||||||
|
|
||||||
|
private int alignOffset;
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void doSetup() {
|
||||||
|
PooledByteBufAllocator pooledAllocator = new PooledByteBufAllocator(true, 4, 4, 8192, 11, 0,
|
||||||
|
0, 0, true, cacheAlign);
|
||||||
|
pooledDirectBuffer = pooledAllocator.directBuffer(size + 64);
|
||||||
|
sizeMask = size - 1;
|
||||||
|
if (cacheAlign == 0) {
|
||||||
|
long addr = pooledDirectBuffer.memoryAddress();
|
||||||
|
// make sure address is miss-aligned
|
||||||
|
if (addr % 64 == 0) {
|
||||||
|
alignOffset = 63;
|
||||||
|
}
|
||||||
|
int off = 0;
|
||||||
|
for (int c = 0; c < size; c++) {
|
||||||
|
off = (off + OFFSET_ADD) & sizeMask;
|
||||||
|
if ((addr + off + alignOffset) % BLOCK == 0) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Misaligned address is not really aligned");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
alignOffset = 0;
|
||||||
|
int off = 0;
|
||||||
|
long addr = pooledDirectBuffer.memoryAddress();
|
||||||
|
for (int c = 0; c < size; c++) {
|
||||||
|
off = (off + OFFSET_ADD) & sizeMask;
|
||||||
|
if ((addr + off) % BLOCK != 0) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Aligned address is not really aligned");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bytes = new byte[BLOCK];
|
||||||
|
rand.nextBytes(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
@TearDown
|
||||||
|
public void doTearDown() {
|
||||||
|
pooledDirectBuffer.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void writeRead() {
|
||||||
|
int off = 0;
|
||||||
|
int lSize = size;
|
||||||
|
int lSizeMask = sizeMask;
|
||||||
|
int lAlignOffset = alignOffset;
|
||||||
|
for (int i = 0; i < lSize; i++) {
|
||||||
|
off = (off + OFFSET_ADD) & lSizeMask;
|
||||||
|
pooledDirectBuffer.setBytes(off + lAlignOffset, bytes);
|
||||||
|
pooledDirectBuffer.getBytes(off + lAlignOffset, bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void write() {
|
||||||
|
int off = 0;
|
||||||
|
int lSize = size;
|
||||||
|
int lSizeMask = sizeMask;
|
||||||
|
int lAlignOffset = alignOffset;
|
||||||
|
for (int i = 0; i < lSize; i++) {
|
||||||
|
off = (off + OFFSET_ADD) & lSizeMask;
|
||||||
|
pooledDirectBuffer.setBytes(off + lAlignOffset, bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void read() {
|
||||||
|
int off = 0;
|
||||||
|
int lSize = size;
|
||||||
|
int lSizeMask = sizeMask;
|
||||||
|
int lAlignOffset = alignOffset;
|
||||||
|
for (int i = 0; i < lSize; i++) {
|
||||||
|
off = (off + OFFSET_ADD) & lSizeMask;
|
||||||
|
pooledDirectBuffer.getBytes(off + lAlignOffset, bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user