Speed up the slow path of FastThreadLocal

Motivation:

The current slow path of FastThreadLocal is much slower than JDK ThreadLocal. See #4418

Modifications:

- Add FastThreadLocalSlowPathBenchmark for the flow path of FastThreadLocal
- Add final to speed up the slow path of FastThreadLocal

Result:

The slow path of FastThreadLocal is improved.
This commit is contained in:
Xiaoyan Lin 2016-03-21 18:06:05 -07:00 committed by Norman Maurer
parent a11412fab0
commit 3ad55eb839
5 changed files with 110 additions and 51 deletions

View File

@ -41,18 +41,10 @@ public final class InternalThreadLocalMap extends UnpaddedInternalThreadLocalMap
public static InternalThreadLocalMap getIfSet() { public static InternalThreadLocalMap getIfSet() {
Thread thread = Thread.currentThread(); Thread thread = Thread.currentThread();
InternalThreadLocalMap threadLocalMap;
if (thread instanceof FastThreadLocalThread) { if (thread instanceof FastThreadLocalThread) {
threadLocalMap = ((FastThreadLocalThread) thread).threadLocalMap(); return ((FastThreadLocalThread) thread).threadLocalMap();
} else {
ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = UnpaddedInternalThreadLocalMap.slowThreadLocalMap;
if (slowThreadLocalMap == null) {
threadLocalMap = null;
} else {
threadLocalMap = slowThreadLocalMap.get();
} }
} return slowThreadLocalMap.get();
return threadLocalMap;
} }
public static InternalThreadLocalMap get() { public static InternalThreadLocalMap get() {
@ -74,11 +66,6 @@ public final class InternalThreadLocalMap extends UnpaddedInternalThreadLocalMap
private static InternalThreadLocalMap slowGet() { private static InternalThreadLocalMap slowGet() {
ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = UnpaddedInternalThreadLocalMap.slowThreadLocalMap; ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = UnpaddedInternalThreadLocalMap.slowThreadLocalMap;
if (slowThreadLocalMap == null) {
UnpaddedInternalThreadLocalMap.slowThreadLocalMap =
slowThreadLocalMap = new ThreadLocal<InternalThreadLocalMap>();
}
InternalThreadLocalMap ret = slowThreadLocalMap.get(); InternalThreadLocalMap ret = slowThreadLocalMap.get();
if (ret == null) { if (ret == null) {
ret = new InternalThreadLocalMap(); ret = new InternalThreadLocalMap();
@ -92,15 +79,12 @@ public final class InternalThreadLocalMap extends UnpaddedInternalThreadLocalMap
if (thread instanceof FastThreadLocalThread) { if (thread instanceof FastThreadLocalThread) {
((FastThreadLocalThread) thread).setThreadLocalMap(null); ((FastThreadLocalThread) thread).setThreadLocalMap(null);
} else { } else {
ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = UnpaddedInternalThreadLocalMap.slowThreadLocalMap;
if (slowThreadLocalMap != null) {
slowThreadLocalMap.remove(); slowThreadLocalMap.remove();
} }
} }
}
public static void destroy() { public static void destroy() {
slowThreadLocalMap = null; slowThreadLocalMap.remove();
} }
public static int nextVariableIndex() { public static int nextVariableIndex() {

View File

@ -32,7 +32,7 @@ import java.util.concurrent.atomic.AtomicInteger;
*/ */
class UnpaddedInternalThreadLocalMap { class UnpaddedInternalThreadLocalMap {
static ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap; static final ThreadLocal<InternalThreadLocalMap> slowThreadLocalMap = new ThreadLocal<InternalThreadLocalMap>();
static final AtomicInteger nextIndex = new AtomicInteger(); static final AtomicInteger nextIndex = new AtomicInteger();
/** Used by {@link FastThreadLocal} */ /** Used by {@link FastThreadLocal} */

View File

@ -24,11 +24,11 @@ import org.openjdk.jmh.annotations.Threads;
import java.util.Random; import java.util.Random;
/** /**
* This class benchmarks different allocators with different allocation sizes. * This class benchmarks the fast path of FastThreadLocal and the JDK ThreadLocal.
*/ */
@Threads(4) @Threads(4)
@Measurement(iterations = 10, batchSize = 100) @Measurement(iterations = 10, batchSize = 100)
public class FastThreadLocalBenchmark extends AbstractMicrobenchmark { public class FastThreadLocalFastPathBenchmark extends AbstractMicrobenchmark {
private static final Random rand = new Random(); private static final Random rand = new Random();
@ -39,19 +39,17 @@ public class FastThreadLocalBenchmark extends AbstractMicrobenchmark {
static { static {
for (int i = 0; i < jdkThreadLocals.length; i ++) { for (int i = 0; i < jdkThreadLocals.length; i ++) {
final int num = rand.nextInt();
jdkThreadLocals[i] = new ThreadLocal<Integer>() { jdkThreadLocals[i] = new ThreadLocal<Integer>() {
@Override @Override
protected Integer initialValue() { protected Integer initialValue() {
return rand.nextInt(); return num;
} }
}; };
}
for (int i = 0; i < fastThreadLocals.length; i ++) {
fastThreadLocals[i] = new FastThreadLocal<Integer>() { fastThreadLocals[i] = new FastThreadLocal<Integer>() {
@Override @Override
protected Integer initialValue() { protected Integer initialValue() {
return rand.nextInt(); return num;
} }
}; };
} }

View File

@ -0,0 +1,79 @@
/*
* Copyright 2016 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.microbench.concurrent;
import io.netty.microbench.util.AbstractMicrobenchmark;
import io.netty.util.concurrent.FastThreadLocal;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Threads;
import java.util.Random;
/**
* This class benchmarks the slow path of FastThreadLocal and the JDK ThreadLocal.
*/
@Threads(4)
@Measurement(iterations = 10, batchSize = 100)
public class FastThreadLocalSlowPathBenchmark extends AbstractMicrobenchmark {
private static final Random rand = new Random();
@SuppressWarnings("unchecked")
private static final ThreadLocal<Integer>[] jdkThreadLocals = new ThreadLocal[128];
@SuppressWarnings("unchecked")
private static final FastThreadLocal<Integer>[] fastThreadLocals = new FastThreadLocal[jdkThreadLocals.length];
static {
for (int i = 0; i < jdkThreadLocals.length; i ++) {
final int num = rand.nextInt();
jdkThreadLocals[i] = new ThreadLocal<Integer>() {
@Override
protected Integer initialValue() {
return num;
}
};
fastThreadLocals[i] = new FastThreadLocal<Integer>() {
@Override
protected Integer initialValue() {
return num;
}
};
}
}
public FastThreadLocalSlowPathBenchmark() {
super(false, true);
}
@Benchmark
public int jdkThreadLocalGet() {
int result = 0;
for (ThreadLocal<Integer> i: jdkThreadLocals) {
result += i.get();
}
return result;
}
@Benchmark
public int fastThreadLocal() {
int result = 0;
for (FastThreadLocal<Integer> i: fastThreadLocals) {
result += i.get();
}
return result;
}
}

View File

@ -32,17 +32,6 @@ import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
public class AbstractMicrobenchmark extends AbstractMicrobenchmarkBase { public class AbstractMicrobenchmark extends AbstractMicrobenchmarkBase {
protected static final int DEFAULT_FORKS = 2; protected static final int DEFAULT_FORKS = 2;
protected static final String[] JVM_ARGS;
static {
final String[] customArgs = {
"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m", "-Djmh.executor=CUSTOM",
"-Djmh.executor.class=io.netty.microbench.util.AbstractMicrobenchmark$HarnessExecutor" };
JVM_ARGS = new String[BASE_JVM_ARGS.length + customArgs.length];
System.arraycopy(BASE_JVM_ARGS, 0, JVM_ARGS, 0, BASE_JVM_ARGS.length);
System.arraycopy(customArgs, 0, JVM_ARGS, BASE_JVM_ARGS.length, customArgs.length);
}
public static final class HarnessExecutor extends ThreadPoolExecutor { public static final class HarnessExecutor extends ThreadPoolExecutor {
public HarnessExecutor(int maxThreads, String prefix) { public HarnessExecutor(int maxThreads, String prefix) {
@ -52,27 +41,36 @@ public class AbstractMicrobenchmark extends AbstractMicrobenchmarkBase {
} }
} }
private final boolean disableAssertions; private final String[] jvmArgs;
private String[] jvmArgsWithNoAssertions;
public AbstractMicrobenchmark() { public AbstractMicrobenchmark() {
this(false); this(false, false);
} }
public AbstractMicrobenchmark(boolean disableAssertions) { public AbstractMicrobenchmark(boolean disableAssertions) {
this.disableAssertions = disableAssertions; this(disableAssertions, false);
}
public AbstractMicrobenchmark(boolean disableAssertions, boolean disableHarnessExecutor) {
final String[] customArgs;
if (disableHarnessExecutor) {
customArgs = new String[]{"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m"};
} else {
customArgs = new String[]{"-Xms768m", "-Xmx768m", "-XX:MaxDirectMemorySize=768m", "-Djmh.executor=CUSTOM",
"-Djmh.executor.class=io.netty.microbench.util.AbstractMicrobenchmark$HarnessExecutor"};
}
String[] jvmArgs = new String[BASE_JVM_ARGS.length + customArgs.length];
System.arraycopy(BASE_JVM_ARGS, 0, jvmArgs, 0, BASE_JVM_ARGS.length);
System.arraycopy(customArgs, 0, jvmArgs, BASE_JVM_ARGS.length, customArgs.length);
if (disableAssertions) {
jvmArgs = removeAssertions(jvmArgs);
}
this.jvmArgs = jvmArgs;
} }
@Override @Override
protected String[] jvmArgs() { protected String[] jvmArgs() {
if (!disableAssertions) { return jvmArgs;
return JVM_ARGS;
}
if (jvmArgsWithNoAssertions == null) {
jvmArgsWithNoAssertions = removeAssertions(JVM_ARGS);
}
return jvmArgsWithNoAssertions;
} }
@Override @Override