Speed up the slow path of FastThreadLocal

Motivation:

The current slow path of FastThreadLocal is much slower than JDK ThreadLocal. See #4418

Modifications:

- Add FastThreadLocalSlowPathBenchmark for the flow path of FastThreadLocal
- Add final to speed up the slow path of FastThreadLocal

Result:

The slow path of FastThreadLocal is improved.
This commit is contained in:
Xiaoyan Lin 2016-03-21 18:06:05 -07:00 committed by Norman Maurer
parent 2308f1703b
commit f5b4937543
6 changed files with 248 additions and 99 deletions

View File

@ -38,18 +38,10 @@ public final class InternalThreadLocalMap extends UnpaddedInternalThreadLocalMap
public static InternalThreadLocalMap getIfSet() {
Thread thread = Thread.currentThread();
InternalThreadLocalMap threadLocalMap;
if (thread instanceof FastThreadLocalThread) {
threadLocalMap = ((FastThreadLocalThread) thread).threadLocalMap();
} else {
ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = UnpaddedInternalThreadLocalMap.slowThreadLocalMap;
if (slowThreadLocalMap == null) {
threadLocalMap = null;
} else {
threadLocalMap = slowThreadLocalMap.get();
}
return ((FastThreadLocalThread) thread).threadLocalMap();
}
return threadLocalMap;
return slowThreadLocalMap.get();
}
public static InternalThreadLocalMap get() {
@ -71,11 +63,6 @@ public final class InternalThreadLocalMap extends UnpaddedInternalThreadLocalMap
private static InternalThreadLocalMap slowGet() {
ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = UnpaddedInternalThreadLocalMap.slowThreadLocalMap;
if (slowThreadLocalMap == null) {
UnpaddedInternalThreadLocalMap.slowThreadLocalMap =
slowThreadLocalMap = new ThreadLocal<InternalThreadLocalMap>();
}
InternalThreadLocalMap ret = slowThreadLocalMap.get();
if (ret == null) {
ret = new InternalThreadLocalMap();
@ -89,15 +76,12 @@ public final class InternalThreadLocalMap extends UnpaddedInternalThreadLocalMap
if (thread instanceof FastThreadLocalThread) {
((FastThreadLocalThread) thread).setThreadLocalMap(null);
} else {
ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = UnpaddedInternalThreadLocalMap.slowThreadLocalMap;
if (slowThreadLocalMap != null) {
slowThreadLocalMap.remove();
}
slowThreadLocalMap.remove();
}
}
public static void destroy() {
slowThreadLocalMap = null;
slowThreadLocalMap.remove();
}
public static int nextVariableIndex() {

View File

@ -31,7 +31,7 @@ import java.util.concurrent.atomic.AtomicInteger;
*/
class UnpaddedInternalThreadLocalMap {
static ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap;
static final ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = new ThreadLocal<InternalThreadLocalMap>();
static final AtomicInteger nextIndex = new AtomicInteger();
/** Used by {@link FastThreadLocal} */

View File

@ -24,11 +24,11 @@ import org.openjdk.jmh.annotations.Threads;
import java.util.Random;
/**
* This class benchmarks different allocators with different allocation sizes.
* This class benchmarks the fast path of FastThreadLocal and the JDK ThreadLocal.
*/
@Threads(4)
@Measurement(iterations = 10, batchSize = 100)
public class FastThreadLocalBenchmark extends AbstractMicrobenchmark {
public class FastThreadLocalFastPathBenchmark extends AbstractMicrobenchmark {
private static final Random rand = new Random();
@ -39,19 +39,17 @@ public class FastThreadLocalBenchmark extends AbstractMicrobenchmark {
static {
for (int i = 0; i < jdkThreadLocals.length; i ++) {
final int num = rand.nextInt();
jdkThreadLocals[i] = new ThreadLocal<Integer>() {
@Override
protected Integer initialValue() {
return rand.nextInt();
return num;
}
};
}
for (int i = 0; i < fastThreadLocals.length; i ++) {
fastThreadLocals[i] = new FastThreadLocal<Integer>() {
@Override
protected Integer initialValue() {
return rand.nextInt();
return num;
}
};
}

View File

@ -0,0 +1,79 @@
/*
* Copyright 2016 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.microbench.concurrent;
import io.netty.microbench.util.AbstractMicrobenchmark;
import io.netty.util.concurrent.FastThreadLocal;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Threads;
import java.util.Random;
/**
* This class benchmarks the slow path of FastThreadLocal and the JDK ThreadLocal.
*/
@Threads(4)
@Measurement(iterations = 10, batchSize = 100)
public class FastThreadLocalSlowPathBenchmark extends AbstractMicrobenchmark {
private static final Random rand = new Random();
@SuppressWarnings("unchecked")
private static final ThreadLocal<Integer>[] jdkThreadLocals = new ThreadLocal[128];
@SuppressWarnings("unchecked")
private static final FastThreadLocal<Integer>[] fastThreadLocals = new FastThreadLocal[jdkThreadLocals.length];
static {
for (int i = 0; i < jdkThreadLocals.length; i ++) {
final int num = rand.nextInt();
jdkThreadLocals[i] = new ThreadLocal<Integer>() {
@Override
protected Integer initialValue() {
return num;
}
};
fastThreadLocals[i] = new FastThreadLocal<Integer>() {
@Override
protected Integer initialValue() {
return num;
}
};
}
}
public FastThreadLocalSlowPathBenchmark() {
super(false, true);
}
@Benchmark
public int jdkThreadLocalGet() {
int result = 0;
for (ThreadLocal<Integer> i: jdkThreadLocals) {
result += i.get();
}
return result;
}
@Benchmark
public int fastThreadLocal() {
int result = 0;
for (FastThreadLocal<Integer> i: fastThreadLocals) {
result += i.get();
}
return result;
}
}

View File

@ -15,36 +15,22 @@
*/
package io.netty.microbench.util;
import io.netty.util.ResourceLeakDetector;
import io.netty.util.concurrent.DefaultThreadFactory;
import io.netty.util.internal.SystemPropertyUtil;
import org.junit.Test;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.results.format.ResultFormatType;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.io.File;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
* Base class for all JMH benchmarks.
*/
@Warmup(iterations = AbstractMicrobenchmark.DEFAULT_WARMUP_ITERATIONS)
@Measurement(iterations = AbstractMicrobenchmark.DEFAULT_MEASURE_ITERATIONS)
@Fork(AbstractMicrobenchmark.DEFAULT_FORKS)
@State(Scope.Thread)
public class AbstractMicrobenchmark {
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
/**
* Default implementation of the JMH microbenchmark adapter. There may be context switches introduced by this harness.
*/
@Fork(AbstractMicrobenchmark.DEFAULT_FORKS)
public class AbstractMicrobenchmark extends AbstractMicrobenchmarkBase {
protected static final int DEFAULT_WARMUP_ITERATIONS = 10;
protected static final int DEFAULT_MEASURE_ITERATIONS = 10;
protected static final int DEFAULT_FORKS = 2;
public static final class HarnessExecutor extends ThreadPoolExecutor {
@ -55,69 +41,49 @@ public class AbstractMicrobenchmark {
}
}
protected static final String[] JVM_ARGS = {
"-server", "-dsa", "-da", "-ea:io.netty...", "-Xms768m", "-Xmx768m",
"-XX:MaxDirectMemorySize=768m", "-XX:+AggressiveOpts", "-XX:+UseBiasedLocking",
"-XX:+UseFastAccessorMethods", "-XX:+UseStringCache", "-XX:+OptimizeStringConcat",
"-XX:+HeapDumpOnOutOfMemoryError", "-Dio.netty.noResourceLeakDetection",
"-Djmh.executor=CUSTOM",
"-Djmh.executor.class=io.netty.microbench.util.AbstractMicrobenchmark$HarnessExecutor"
};
private final String[] jvmArgs;
static {
ResourceLeakDetector.setLevel(ResourceLeakDetector.Level.DISABLED);
public AbstractMicrobenchmark() {
this(false, false);
}
@Test
public void run() throws Exception {
String className = getClass().getSimpleName();
public AbstractMicrobenchmark(boolean disableAssertions) {
this(disableAssertions, false);
}
ChainedOptionsBuilder runnerOptions = new OptionsBuilder()
.include(".*" + className + ".*")
.jvmArgs(JVM_ARGS);
if (getWarmupIterations() > 0) {
runnerOptions.warmupIterations(getWarmupIterations());
public AbstractMicrobenchmark(boolean disableAssertions, boolean disableHarnessExecutor) {
final String[] customArgs;
if (disableHarnessExecutor) {
customArgs = new String[]{"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m"};
} else {
customArgs = new String[]{"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m", "-Djmh.executor=CUSTOM",
"-Djmh.executor.class=io.netty.microbench.util.AbstractMicrobenchmark$HarnessExecutor"};
}
if (getMeasureIterations() > 0) {
runnerOptions.measurementIterations(getMeasureIterations());
String[] jvmArgs = new String[BASE_JVM_ARGS.length + customArgs.length];
System.arraycopy(BASE_JVM_ARGS, 0, jvmArgs, 0, BASE_JVM_ARGS.length);
System.arraycopy(customArgs, 0, jvmArgs, BASE_JVM_ARGS.length, customArgs.length);
if (disableAssertions) {
jvmArgs = removeAssertions(jvmArgs);
}
this.jvmArgs = jvmArgs;
}
@Override
protected String[] jvmArgs() {
return jvmArgs;
}
@Override
protected ChainedOptionsBuilder newOptionsBuilder() throws Exception {
ChainedOptionsBuilder runnerOptions = super.newOptionsBuilder();
if (getForks() > 0) {
runnerOptions.forks(getForks());
}
if (getReportDir() != null) {
String filePath = getReportDir() + className + ".json";
File file = new File(filePath);
if (file.exists()) {
file.delete();
} else {
file.getParentFile().mkdirs();
file.createNewFile();
}
runnerOptions.resultFormat(ResultFormatType.JSON);
runnerOptions.result(filePath);
}
new Runner(runnerOptions.build()).run();
}
protected int getWarmupIterations() {
return SystemPropertyUtil.getInt("warmupIterations", -1);
}
protected int getMeasureIterations() {
return SystemPropertyUtil.getInt("measureIterations", -1);
return runnerOptions;
}
protected int getForks() {
return SystemPropertyUtil.getInt("forks", -1);
}
protected String getReportDir() {
return SystemPropertyUtil.get("perfReportDir");
}
}

View File

@ -0,0 +1,122 @@
/*
* Copyright 2015 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.microbench.util;
import static org.junit.Assert.assertNull;
import io.netty.util.ResourceLeakDetector;
import io.netty.util.internal.SystemPropertyUtil;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.junit.Test;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.results.format.ResultFormatType;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
import org.openjdk.jmh.runner.options.OptionsBuilder;
/**
* Base class for all JMH benchmarks.
*/
@Warmup(iterations = AbstractMicrobenchmarkBase.DEFAULT_WARMUP_ITERATIONS)
@Measurement(iterations = AbstractMicrobenchmarkBase.DEFAULT_MEASURE_ITERATIONS)
@State(Scope.Thread)
public abstract class AbstractMicrobenchmarkBase {
protected static final int DEFAULT_WARMUP_ITERATIONS = 10;
protected static final int DEFAULT_MEASURE_ITERATIONS = 10;
protected static final String[] BASE_JVM_ARGS = {
"-server", "-dsa", "-da", "-ea:io.netty...", "-XX:+AggressiveOpts", "-XX:+UseBiasedLocking",
"-XX:+UseFastAccessorMethods", "-XX:+OptimizeStringConcat",
"-XX:+HeapDumpOnOutOfMemoryError", "-Dio.netty.noResourceLeakDetection"};
static {
ResourceLeakDetector.setLevel(ResourceLeakDetector.Level.DISABLED);
}
protected ChainedOptionsBuilder newOptionsBuilder() throws Exception {
String className = getClass().getSimpleName();
ChainedOptionsBuilder runnerOptions = new OptionsBuilder()
.include(".*" + className + ".*")
.jvmArgs(jvmArgs());
if (getWarmupIterations() > 0) {
runnerOptions.warmupIterations(getWarmupIterations());
}
if (getMeasureIterations() > 0) {
runnerOptions.measurementIterations(getMeasureIterations());
}
if (getReportDir() != null) {
String filePath = getReportDir() + className + ".json";
File file = new File(filePath);
if (file.exists()) {
file.delete();
} else {
file.getParentFile().mkdirs();
file.createNewFile();
}
runnerOptions.resultFormat(ResultFormatType.JSON);
runnerOptions.result(filePath);
}
return runnerOptions;
}
protected abstract String[] jvmArgs();
protected static String[] removeAssertions(String[] jvmArgs) {
List<String> customArgs = new ArrayList<String>(jvmArgs.length);
for (String arg : jvmArgs) {
if (!arg.startsWith("-ea")) {
customArgs.add(arg);
}
}
if (jvmArgs.length != customArgs.size()) {
jvmArgs = new String[customArgs.size()];
customArgs.toArray(jvmArgs);
}
return jvmArgs;
}
@Test
public void run() throws Exception {
new Runner(newOptionsBuilder().build()).run();
}
protected int getWarmupIterations() {
return SystemPropertyUtil.getInt("warmupIterations", -1);
}
protected int getMeasureIterations() {
return SystemPropertyUtil.getInt("measureIterations", -1);
}
protected String getReportDir() {
return SystemPropertyUtil.get("perfReportDir");
}
public static void handleUnexpectedException(Throwable t) {
assertNull(t);
}
}