Replace usage() with freeBytes() in thresholds within hot paths of PoolChunkList (#10141)

Motivation:
PoolChunk.usage() method has non-trivial computations. It is used currently in hot path methods invoked when an allocation and de-allocation are happened.
The idea is to replace usage() output comparison against percent thresholds by Chunk.freeBytes plain comparison against absolute thresholds. In such way the majority of computations from the threshold conditions are moved to init logic.

Modifications:
Replace PoolChunk.usage() conditions in PoolChunkList with equivalent conditions for PoolChunk.freeBytes()

Result:
Improve performance of allocation and de-allocation of ByteBuf from normal size cache pool
This commit is contained in:
Dmitry Konstantinov 2020-03-31 23:11:16 +03:00 committed by Norman Maurer
parent 7564ed54f9
commit dc69c04434
4 changed files with 107 additions and 7 deletions

View File

@ -132,7 +132,7 @@ final class PoolChunk<T> implements PoolChunkMetric {
// This may be null if the PoolChunk is unpooled as pooling the ByteBuffer instances does not make any sense here.
private final Deque<ByteBuffer> cachedNioBuffers;
private int freeBytes;
int freeBytes;
PoolChunkList<T> parent;
PoolChunk<T> prev;

View File

@ -35,6 +35,8 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
private final int maxUsage;
private final int maxCapacity;
private PoolChunk<T> head;
private final int freeMinThreshold;
private final int freeMaxThreshold;
// This is only update once when create the linked like list of PoolChunkList in PoolArena constructor.
private PoolChunkList<T> prevList;
@ -49,6 +51,24 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
this.minUsage = minUsage;
this.maxUsage = maxUsage;
maxCapacity = calculateMaxCapacity(minUsage, chunkSize);
// the thresholds are aligned with PoolChunk.usage() logic:
// 1) basic logic: usage() = 100 - freeBytes * 100L / chunkSize
// so, for example: (usage() >= maxUsage) condition can be transformed in the following way:
// 100 - freeBytes * 100L / chunkSize >= maxUsage
// freeBytes <= chunkSize * (100 - maxUsage) / 100
// let freeMinThreshold = chunkSize * (100 - maxUsage) / 100, then freeBytes <= freeMinThreshold
//
// 2) usage() returns an int value and has a floor rounding during a calculation,
// to be aligned absolute thresholds should be shifted for "the rounding step":
// freeBytes * 100 / chunkSize < 1
// the condition can be converted to: freeBytes < 1 * chunkSize / 100
// this is why we have + 0.99999999 shifts. A example why just +1 shift cannot be used:
// freeBytes = 16777216 == freeMaxThreshold: 16777216, usage = 0 < minUsage: 1, chunkSize: 16777216
// At the same time we want to have zero thresholds in case of (maxUsage == 100) and (minUsage == 100).
//
freeMinThreshold = (maxUsage == 100) ? 0 : (int) (chunkSize * (100.0 - maxUsage + 0.99999999) / 100L);
freeMaxThreshold = (minUsage == 100) ? 0 : (int) (chunkSize * (100.0 - minUsage + 0.99999999) / 100L);
}
/**
@ -85,7 +105,7 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
for (PoolChunk<T> cur = head; cur != null; cur = cur.next) {
if (cur.allocate(buf, reqCapacity, normCapacity)) {
if (cur.usage() >= maxUsage) {
if (cur.freeBytes <= freeMinThreshold) {
remove(cur);
nextList.add(cur);
}
@ -97,7 +117,7 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
boolean free(PoolChunk<T> chunk, long handle, ByteBuffer nioBuffer) {
chunk.free(handle, nioBuffer);
if (chunk.usage() < minUsage) {
if (chunk.freeBytes > freeMaxThreshold) {
remove(chunk);
// Move the PoolChunk down the PoolChunkList linked-list.
return move0(chunk);
@ -108,7 +128,7 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
private boolean move(PoolChunk<T> chunk) {
assert chunk.usage() < maxUsage;
if (chunk.usage() < minUsage) {
if (chunk.freeBytes > freeMaxThreshold) {
// Move the PoolChunk down the PoolChunkList linked-list.
return move0(chunk);
}
@ -133,7 +153,7 @@ final class PoolChunkList<T> implements PoolChunkListMetric {
}
void add(PoolChunk<T> chunk) {
if (chunk.usage() >= maxUsage) {
if (chunk.freeBytes <= freeMinThreshold) {
nextList.add(chunk);
return;
}

View File

@ -0,0 +1,77 @@
/*
* Copyright 2020 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.microbench.buffer;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.netty.buffer.PooledByteBufAllocator;
import io.netty.microbench.util.AbstractMicrobenchmark;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.infra.Blackhole;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public class SimpleByteBufPooledAllocatorBenchmark extends AbstractMicrobenchmark {
public SimpleByteBufPooledAllocatorBenchmark() {
super(true, false);
}
@Param({"123", "1234", "12345", "123456", "1234567"})
public int size;
@Param({"0", "5", "10", "100"})
public long tokens;
@Param({"false", "true"})
public boolean useThreadCache;
public ByteBufAllocator allocator;
@Setup(Level.Trial)
public void doSetup() {
allocator = new PooledByteBufAllocator(
PooledByteBufAllocator.defaultPreferDirect(),
PooledByteBufAllocator.defaultNumHeapArena(),
PooledByteBufAllocator.defaultNumDirectArena(),
PooledByteBufAllocator.defaultPageSize(),
PooledByteBufAllocator.defaultMaxOrder(),
PooledByteBufAllocator.defaultTinyCacheSize(),
PooledByteBufAllocator.defaultSmallCacheSize(),
PooledByteBufAllocator.defaultNormalCacheSize(),
useThreadCache);
}
@Benchmark
public boolean getAndRelease() {
ByteBuf buf = allocator.directBuffer(size);
if (tokens > 0) {
Blackhole.consumeCPU(tokens);
}
return buf.release();
}
}

View File

@ -58,9 +58,12 @@ public class AbstractMicrobenchmark extends AbstractMicrobenchmarkBase {
public AbstractMicrobenchmark(boolean disableAssertions, boolean disableHarnessExecutor) {
final String[] customArgs;
if (disableHarnessExecutor) {
customArgs = new String[]{"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m"};
customArgs = new String[]{"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m",
"-XX:BiasedLockingStartupDelay=0"};
} else {
customArgs = new String[]{"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m", "-Djmh.executor=CUSTOM",
customArgs = new String[]{"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m",
"-XX:BiasedLockingStartupDelay=0",
"-Djmh.executor=CUSTOM",
"-Djmh.executor.class=io.netty.microbench.util.AbstractMicrobenchmark$HarnessExecutor"};
}
String[] jvmArgs = new String[BASE_JVM_ARGS.length + customArgs.length];