From a7f7d9c8e004aa13801424e6e665b1ec525a822f Mon Sep 17 00:00:00 2001 From: Scott Mitchell Date: Wed, 22 Jun 2016 18:07:15 -0700 Subject: [PATCH] Remove unsafe char[] access in PlatformDependent Motivation: PlatformDependent attempts to use reflection to get the underlying char[] (or byte[]) from String objects. This is fragile as if the String implementation does not utilize the full array, and instead uses a subset of the array, this optimization is invalid. OpenJDK6 and some earlier versions of OpenJDK7 String have the capability to use a subsection of the underlying char[]. Modifications: - PlatformDependent should not attempt to use the underlying array from String (or other data types) via reflection Result: PlatformDependent hash code generation for CharSequence does not depend upon specific JDK implementation details. --- .../codec/http2/HashCollisionTest.java | 177 +++++++++++++ .../io/netty/handler/codec/HeadersUtils.java | 2 +- .../util/internal/PlatformDependent.java | 237 +++++++++++------- .../util/internal/PlatformDependent0.java | 206 ++------------- .../util/internal/PlatformDependentTest.java | 3 - .../common/AsciiStringBenchmark.java | 2 +- 6 files changed, 343 insertions(+), 284 deletions(-) create mode 100644 codec-http2/src/test/java/io/netty/handler/codec/http2/HashCollisionTest.java diff --git a/codec-http2/src/test/java/io/netty/handler/codec/http2/HashCollisionTest.java b/codec-http2/src/test/java/io/netty/handler/codec/http2/HashCollisionTest.java new file mode 100644 index 0000000000..4fe437a737 --- /dev/null +++ b/codec-http2/src/test/java/io/netty/handler/codec/http2/HashCollisionTest.java @@ -0,0 +1,177 @@ +/* + * Copyright 2016 The Netty Project + * + * The Netty Project licenses this file to you under the Apache License, version 2.0 (the + * "License"); you may not use this file except in compliance with the License. You may obtain a + * copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package io.netty.handler.codec.http2; + +import io.netty.handler.codec.http.HttpHeaderNames; +import io.netty.handler.codec.http.HttpHeaderValues; +import io.netty.util.AsciiString; +import io.netty.util.internal.PlatformDependent; +import org.junit.Ignore; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.io.PrintStream; +import java.lang.reflect.Field; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +@Ignore +public final class HashCollisionTest { + private HashCollisionTest() { } + + public static void main(String[] args) throws IllegalAccessException, IOException, URISyntaxException { + // Big initial size for when all name sources are pulled in. + List strings = new ArrayList(350000); + addHttpHeaderNames(strings); + addHttpHeaderValues(strings); + addHttp2HeaderNames(strings); + addWordsFromFile(new File("/usr/share/dict/words"), strings); + // More "english words" can be found here: + // https://gist.github.com/Scottmitch/de2f03912778016ecee3c140478f07e0#file-englishwords-txt + + Map> dups = calculateDuplicates(strings, new Function() { + @Override + public Integer apply(CharSequence string) { + int h = 0; + for (int i = 0; i < string.length(); ++i) { + // masking with 0x1F reduces the number of overall bits that impact the hash code but makes the hash + // code the same regardless of character case (upper case or lower case hash is the same). + h = h * 31 + (string.charAt(i) & 0x1F); + } + return h; + } + }); + PrintStream writer = System.out; + writer.println("==Old Duplicates=="); + printResults(writer, dups); + + dups = calculateDuplicates(strings, new Function() { + @Override + public Integer apply(CharSequence string) { + return PlatformDependent.hashCodeAscii(string); + } + }); + writer.println(); + writer.println("==New Duplicates=="); + printResults(writer, dups); + } + + private static void addHttpHeaderNames(List values) throws IllegalAccessException { + for (Field f : HttpHeaderNames.class.getFields()) { + if (f.getType() == AsciiString.class) { + values.add((AsciiString) f.get(null)); + } + } + } + + private static void addHttpHeaderValues(List values) throws IllegalAccessException { + for (Field f : HttpHeaderValues.class.getFields()) { + if (f.getType() == AsciiString.class) { + values.add((AsciiString) f.get(null)); + } + } + } + + private static void addHttp2HeaderNames(List values) throws IllegalAccessException { + for (Http2Headers.PseudoHeaderName name : Http2Headers.PseudoHeaderName.values()) { + values.add(name.value()); + } + } + + private static void addWordsFromFile(File file, List values) + throws IllegalAccessException, IOException { + BufferedReader br = new BufferedReader(new FileReader(file)); + try { + String line; + while ((line = br.readLine()) != null) { + // Make a "best effort" to prune input which contains characters that are not valid in HTTP header names + if (line.indexOf('\'') < 0) { + values.add(line); + } + } + } finally { + br.close(); + } + } + + private static Map> calculateDuplicates(List strings, + Function hasher) { + Map> hashResults = new HashMap>(); + Set duplicateHashCodes = new HashSet(); + + for (CharSequence str : strings) { + Integer hash = hasher.apply(str); + List results = hashResults.get(hash); + if (results == null) { + results = new ArrayList(1); + hashResults.put(hash, results); + } else { + duplicateHashCodes.add(hash); + } + results.add(str); + } + + if (duplicateHashCodes.isEmpty()) { + return Collections.emptyMap(); + } + Map> duplicates = + new HashMap>(duplicateHashCodes.size()); + for (Integer duplicateHashCode : duplicateHashCodes) { + List realDups = new ArrayList(2); + Iterator itr = hashResults.get(duplicateHashCode).iterator(); + // there should be at least 2 elements in the list ... bcz there may be duplicates + realDups.add(itr.next()); + checknext: do { + CharSequence next = itr.next(); + for (CharSequence potentialDup : realDups) { + if (!AsciiString.contentEqualsIgnoreCase(next, potentialDup)) { + realDups.add(next); + break checknext; + } + } + } while (itr.hasNext()); + + if (realDups.size() > 1) { + duplicates.put(duplicateHashCode, realDups); + } + } + return duplicates; + } + + private static void printResults(PrintStream stream, Map> dups) { + stream.println("Number duplicates: " + dups.size()); + for (Entry> entry : dups.entrySet()) { + stream.print(entry.getValue().size() + " duplicates for hash: " + entry.getKey() + " values: "); + for (CharSequence str : entry.getValue()) { + stream.print("[" + str + "] "); + } + stream.println(); + } + } + + private interface Function { + R apply(P param); + } +} diff --git a/codec/src/main/java/io/netty/handler/codec/HeadersUtils.java b/codec/src/main/java/io/netty/handler/codec/HeadersUtils.java index 77cd5f7465..d4062209b3 100644 --- a/codec/src/main/java/io/netty/handler/codec/HeadersUtils.java +++ b/codec/src/main/java/io/netty/handler/codec/HeadersUtils.java @@ -211,7 +211,7 @@ public final class HeadersUtils { @Override public Object[] toArray() { - Object[] arr = new String[size()]; + Object[] arr = new Object[size()]; fillArray(arr); return arr; } diff --git a/common/src/main/java/io/netty/util/internal/PlatformDependent.java b/common/src/main/java/io/netty/util/internal/PlatformDependent.java index 361e1ca142..d474a8a6d0 100644 --- a/common/src/main/java/io/netty/util/internal/PlatformDependent.java +++ b/common/src/main/java/io/netty/util/internal/PlatformDependent.java @@ -56,9 +56,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import static io.netty.util.internal.PlatformDependent0.HASH_CODE_ASCII_SEED; -import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiCompute; +import static io.netty.util.internal.PlatformDependent0.HASH_CODE_C1; +import static io.netty.util.internal.PlatformDependent0.HASH_CODE_C2; import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitize; -import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitizeAsByte; import static io.netty.util.internal.PlatformDependent0.unalignedAccess; /** @@ -431,27 +431,6 @@ public final class PlatformDependent { ((long) bytes[offset + 7] & 0xff) << 56; } - private static long getLongFromBytesSafe(CharSequence bytes, int offset) { - if (BIG_ENDIAN_NATIVE_ORDER) { - return (long) bytes.charAt(offset) << 56 | - ((long) bytes.charAt(offset + 1) & 0xff) << 48 | - ((long) bytes.charAt(offset + 2) & 0xff) << 40 | - ((long) bytes.charAt(offset + 3) & 0xff) << 32 | - ((long) bytes.charAt(offset + 4) & 0xff) << 24 | - ((long) bytes.charAt(offset + 5) & 0xff) << 16 | - ((long) bytes.charAt(offset + 6) & 0xff) << 8 | - (long) bytes.charAt(offset + 7) & 0xff; - } - return (long) bytes.charAt(offset) & 0xff | - ((long) bytes.charAt(offset + 1) & 0xff) << 8 | - ((long) bytes.charAt(offset + 2) & 0xff) << 16 | - ((long) bytes.charAt(offset + 3) & 0xff) << 24 | - ((long) bytes.charAt(offset + 4) & 0xff) << 32 | - ((long) bytes.charAt(offset + 5) & 0xff) << 40 | - ((long) bytes.charAt(offset + 6) & 0xff) << 48 | - ((long) bytes.charAt(offset + 7) & 0xff) << 56; - } - private static int getIntSafe(byte[] bytes, int offset) { if (BIG_ENDIAN_NATIVE_ORDER) { return bytes[offset] << 24 | @@ -465,19 +444,6 @@ public final class PlatformDependent { bytes[offset + 3] << 24; } - private static int getIntFromBytesSafe(CharSequence bytes, int offset) { - if (BIG_ENDIAN_NATIVE_ORDER) { - return bytes.charAt(offset) << 24 | - (bytes.charAt(offset + 1) & 0xff) << 16 | - (bytes.charAt(offset + 2) & 0xff) << 8 | - bytes.charAt(offset + 3) & 0xff; - } - return bytes.charAt(offset) & 0xff | - (bytes.charAt(offset + 1) & 0xff) << 8 | - (bytes.charAt(offset + 2) & 0xff) << 16 | - bytes.charAt(offset + 3) << 24; - } - private static short getShortSafe(byte[] bytes, int offset) { if (BIG_ENDIAN_NATIVE_ORDER) { return (short) (bytes[offset] << 8 | (bytes[offset + 1] & 0xff)); @@ -485,11 +451,54 @@ public final class PlatformDependent { return (short) (bytes[offset] & 0xff | (bytes[offset + 1] << 8)); } - private static short getShortFromBytesSafe(CharSequence bytes, int offset) { + /** + * Identical to {@link PlatformDependent0#hashCodeAsciiCompute(long, int)} but for {@link CharSequence}. + */ + private static int hashCodeAsciiCompute(CharSequence value, int offset, int hash) { + // masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash + // code the same regardless of character case (upper case or lower case hash is the same). + return hash * HASH_CODE_C1 + + // Low order int + hashCodeAsciiSanitizeInt(value, offset) * HASH_CODE_C2 + + // High order int + hashCodeAsciiSanitizeInt(value, offset + 4); + } + + /** + * Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(int)} but for {@link CharSequence}. + */ + private static int hashCodeAsciiSanitizeInt(CharSequence value, int offset) { if (BIG_ENDIAN_NATIVE_ORDER) { - return (short) (bytes.charAt(offset) << 8 | (bytes.charAt(offset + 1) & 0xff)); + // mimic a unsafe.getInt call on a big endian machine + return (value.charAt(offset) & 0x1f) | + (value.charAt(offset + 2) & 0x1f) << 8 | + (value.charAt(offset + 1) & 0x1f) << 16 | + (value.charAt(offset) & 0x1f) << 24; } - return (short) (bytes.charAt(offset) & 0xff | (bytes.charAt(offset + 1) << 8)); + return (value.charAt(offset + 3) & 0x1f) << 24 | + (value.charAt(offset + 2) & 0x1f) << 16 | + (value.charAt(offset + 1) & 0x1f) << 8 | + (value.charAt(offset) & 0x1f); + } + + /** + * Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(short)} but for {@link CharSequence}. + */ + private static int hashCodeAsciiSanitizeShort(CharSequence value, int offset) { + if (BIG_ENDIAN_NATIVE_ORDER) { + // mimic a unsafe.getShort call on a big endian machine + return (value.charAt(offset + 1) & 0x1f) | + (value.charAt(offset) & 0x1f) << 8; + } + return (value.charAt(offset + 1) & 0x1f) << 8 | + (value.charAt(offset) & 0x1f); + } + + /** + * Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(byte)} but for {@link CharSequence}. + */ + private static int hashCodeAsciiSanitizsByte(char value) { + return value & 0x1f; } public static void putOrderedObject(Object object, long address, Object value) { @@ -676,10 +685,9 @@ public final class PlatformDependent { * The resulting hash code will be case insensitive. */ public static int hashCodeAscii(byte[] bytes, int startPos, int length) { - if (!hasUnsafe() || !unalignedAccess()) { - return hashCodeAsciiSafe(bytes, startPos, length); - } - return PlatformDependent0.hashCodeAscii(bytes, startPos, length); + return !hasUnsafe() || !unalignedAccess() ? + hashCodeAsciiSafe(bytes, startPos, length) : + PlatformDependent0.hashCodeAscii(bytes, startPos, length); } /** @@ -693,14 +701,83 @@ public final class PlatformDependent { * The resulting hash code will be case insensitive. */ public static int hashCodeAscii(CharSequence bytes) { - if (!hasUnsafe() || !unalignedAccess()) { - return hashCodeAsciiSafe(bytes); - } else if (PlatformDependent0.hasCharArray(bytes)) { - return PlatformDependent0.hashCodeAscii(PlatformDependent0.charArray(bytes)); - } else if (PlatformDependent0.hasByteArray(bytes)) { - return PlatformDependent0.hashCodeAscii(PlatformDependent0.byteArray(bytes)); + int hash = HASH_CODE_ASCII_SEED; + final int remainingBytes = bytes.length() & 7; + // Benchmarking shows that by just naively looping for inputs 8~31 bytes long we incur a relatively large + // performance penalty (only achieve about 60% performance of loop which iterates over each char). So because + // of this we take special provisions to unroll the looping for these conditions. + switch (bytes.length()) { + case 31: + case 30: + case 29: + case 28: + case 27: + case 26: + case 25: + case 24: + hash = hashCodeAsciiCompute(bytes, bytes.length() - 24, + hashCodeAsciiCompute(bytes, bytes.length() - 16, + hashCodeAsciiCompute(bytes, bytes.length() - 8, hash))); + break; + case 23: + case 22: + case 21: + case 20: + case 19: + case 18: + case 17: + case 16: + hash = hashCodeAsciiCompute(bytes, bytes.length() - 16, + hashCodeAsciiCompute(bytes, bytes.length() - 8, hash)); + break; + case 15: + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + case 8: + hash = hashCodeAsciiCompute(bytes, bytes.length() - 8, hash); + break; + case 7: + case 6: + case 5: + case 4: + case 3: + case 2: + case 1: + case 0: + break; + default: + for (int i = bytes.length() - 8; i >= remainingBytes; i -= 8) { + hash = hashCodeAsciiCompute(bytes, i, hash); + } + break; + } + switch(remainingBytes) { + case 7: + return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0))) + * HASH_CODE_C2 + hashCodeAsciiSanitizeShort(bytes, 1)) + * HASH_CODE_C1 + hashCodeAsciiSanitizeInt(bytes, 3); + case 6: + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizeShort(bytes, 0)) + * HASH_CODE_C2 + hashCodeAsciiSanitizeInt(bytes, 2); + case 5: + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0))) + * HASH_CODE_C2 + hashCodeAsciiSanitizeInt(bytes, 1); + case 4: + return hash * HASH_CODE_C1 + hashCodeAsciiSanitizeInt(bytes, 0); + case 3: + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0))) + * HASH_CODE_C2 + hashCodeAsciiSanitizeShort(bytes, 1); + case 2: + return hash * HASH_CODE_C1 + hashCodeAsciiSanitizeShort(bytes, 0); + case 1: + return hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0)); + default: + return hash; } - return hashCodeAsciiSafe(bytes); } /** @@ -1251,62 +1328,28 @@ public final class PlatformDependent { final int remainingBytes = length & 7; final int end = startPos + remainingBytes; for (int i = startPos - 8 + length; i >= end; i -= 8) { - hash = hashCodeAsciiCompute(getLongSafe(bytes, i), hash); + hash = PlatformDependent0.hashCodeAsciiCompute(getLongSafe(bytes, i), hash); } switch(remainingBytes) { case 7: - return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 3)), 13)) - * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1))) - * 31 + hashCodeAsciiSanitize(bytes[startPos]); + return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos])) + * HASH_CODE_C2 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1))) + * HASH_CODE_C1 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 3)); case 6: - return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 2)), 13)) - * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos)); + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos))) + * HASH_CODE_C2 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 2)); case 5: - return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 1)), 13)) - * 31 + hashCodeAsciiSanitize(bytes[startPos]); + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos])) + * HASH_CODE_C2 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 1)); case 4: - return hash * 31 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos)); + return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos)); case 3: - return (hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1))) - * 31 + hashCodeAsciiSanitize(bytes[startPos]); + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos])) + * HASH_CODE_C2 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)); case 2: - return hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos)); + return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos)); case 1: - return hash * 31 + hashCodeAsciiSanitize(bytes[startPos]); - default: - return hash; - } - } - - /** - * Package private for testing purposes only! - */ - static int hashCodeAsciiSafe(CharSequence bytes) { - int hash = HASH_CODE_ASCII_SEED; - final int remainingBytes = bytes.length() & 7; - for (int i = bytes.length() - 8; i >= remainingBytes; i -= 8) { - hash = hashCodeAsciiCompute(getLongFromBytesSafe(bytes, i), hash); - } - switch(remainingBytes) { - case 7: - return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 3)), 13)) - * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1))) - * 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0)); - case 6: - return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 2)), 13)) - * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0)); - case 5: - return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 1)), 13)) - * 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0)); - case 4: - return hash * 31 + hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 0)); - case 3: - return (hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1))) - * 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0)); - case 2: - return hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0)); - case 1: - return hash * 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0)); + return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]); default: return hash; } diff --git a/common/src/main/java/io/netty/util/internal/PlatformDependent0.java b/common/src/main/java/io/netty/util/internal/PlatformDependent0.java index c6266fe898..a3194f2612 100644 --- a/common/src/main/java/io/netty/util/internal/PlatformDependent0.java +++ b/common/src/main/java/io/netty/util/internal/PlatformDependent0.java @@ -41,13 +41,12 @@ final class PlatformDependent0 { static final Unsafe UNSAFE; private static final long ADDRESS_FIELD_OFFSET; private static final long BYTE_ARRAY_BASE_OFFSET; - private static final long CHAR_ARRAY_BASE_OFFSET; - private static final long CHAR_ARRAY_INDEX_SCALE; - private static final long STRING_CHAR_VALUE_FIELD_OFFSET; - private static final long STRING_BYTE_VALUE_FIELD_OFFSET; private static final Constructor DIRECT_BUFFER_CONSTRUCTOR; - static final int HASH_CODE_ASCII_SEED = 0xc2b2ae35; // constant borrowed from murmur3 + // constants borrowed from murmur3 + static final int HASH_CODE_ASCII_SEED = 0xc2b2ae35; + static final int HASH_CODE_C1 = 0x1b873593; + static final int HASH_CODE_C2 = 0x1b873593; /** * Limits the number of bytes to copy per {@link Unsafe#copyMemory(long, long, long)} to allow safepoint polling @@ -117,9 +116,8 @@ final class PlatformDependent0 { if (unsafe == null) { ADDRESS_FIELD_OFFSET = -1; - BYTE_ARRAY_BASE_OFFSET = CHAR_ARRAY_BASE_OFFSET = CHAR_ARRAY_INDEX_SCALE = -1; + BYTE_ARRAY_BASE_OFFSET = -1; UNALIGNED = false; - STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1; DIRECT_BUFFER_CONSTRUCTOR = null; } else { Constructor directBufferConstructor; @@ -142,8 +140,6 @@ final class PlatformDependent0 { ADDRESS_FIELD_OFFSET = objectFieldOffset(addressField); BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class); - CHAR_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(char[].class); - CHAR_ARRAY_INDEX_SCALE = UNSAFE.arrayIndexScale(char[].class); boolean unaligned; try { Class bitsClass = Class.forName("java.nio.Bits", false, ClassLoader.getSystemClassLoader()); @@ -159,48 +155,6 @@ final class PlatformDependent0 { UNALIGNED = unaligned; logger.debug("java.nio.Bits.unaligned: {}", UNALIGNED); - - Field stringValueField = null; - try { - stringValueField = AccessController.doPrivileged(new PrivilegedAction() { - @Override - public Field run() { - try { - Field f = String.class.getDeclaredField("value"); - f.setAccessible(true); - return f; - } catch (NoSuchFieldException e) { - logger.info("Failed to find String value array (please report an issue)." + - "String hash code optimizations are disabled.", e); - } catch (SecurityException e) { - logger.debug("No permissions to get String value array." + - "String hash code optimizations are disabled.", e); - } - return null; - } - }); - } catch (Throwable t) { - logger.debug("AccessController.doPrivileged failed to get String value array." + - "String hash code optimizations are disabled.", t); - } - - if (stringValueField == null) { - STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1; - } else { - long stringValueFieldOffset = UNSAFE.objectFieldOffset(stringValueField); - Object o = UNSAFE.getObject("", stringValueFieldOffset); - if (char[].class.isInstance(o)) { - STRING_CHAR_VALUE_FIELD_OFFSET = stringValueFieldOffset; - STRING_BYTE_VALUE_FIELD_OFFSET = -1; - } else if (byte[].class.isInstance(o)) { - STRING_CHAR_VALUE_FIELD_OFFSET = -1; - STRING_BYTE_VALUE_FIELD_OFFSET = stringValueFieldOffset; - } else { - STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1; - logger.info("Unexpected type [" + o.getClass() + "] for String value array." + - "String hash code optimizations are disabled."); - } - } } logger.debug("java.nio.DirectByteBuffer.(long, int): {}", @@ -456,173 +410,61 @@ final class PlatformDependent0 { } } - static int hashCodeAscii(byte[] bytes) { - return hashCodeAscii(bytes, 0, bytes.length); - } - - /** - * This must remain consistent with {@link #hashCodeAscii(char[])}. - */ static int hashCodeAscii(byte[] bytes, int startPos, int length) { int hash = HASH_CODE_ASCII_SEED; final long baseOffset = BYTE_ARRAY_BASE_OFFSET + startPos; final int remainingBytes = length & 7; - if (length > 7) { // Fast path for small sized inputs. Benchmarking shows this is beneficial. - final long end = baseOffset + remainingBytes; - for (long i = baseOffset - 8 + length; i >= end; i -= 8) { - hash = hashCodeAsciiCompute(UNSAFE.getLong(bytes, i), hash); - } + final long end = baseOffset + remainingBytes; + for (long i = baseOffset - 8 + length; i >= end; i -= 8) { + hash = hashCodeAsciiCompute(UNSAFE.getLong(bytes, i), hash); } switch(remainingBytes) { case 7: - return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 3)), 13)) - * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1))) - * 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)); + return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset))) + * HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1))) + * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 3)); case 6: - return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 2)), 13)) - * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset)); + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset))) + * HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 2)); case 5: - return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 1)), 13)) - * 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)); + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset))) + * HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 1)); case 4: - return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset)); + return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset)); case 3: - return (hash * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1))) - * 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)); + return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset))) + * HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)); case 2: - return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset)); + return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset)); case 1: - return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)); + return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)); default: return hash; } } - /** - * This method assumes that {@code bytes} is equivalent to a {@code byte[]} but just using {@code char[]} - * for storage. The MSB of each {@code char} from {@code bytes} is ignored. - *

- * This must remain consistent with {@link #hashCodeAscii(byte[], int, int)}. - */ - static int hashCodeAscii(char[] bytes) { - int hash = HASH_CODE_ASCII_SEED; - final int remainingBytes = bytes.length & 7; - for (int i = bytes.length - 8; i >= remainingBytes; i -= 8) { - hash = hashCodeAsciiComputeFromChar( - UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + i * CHAR_ARRAY_INDEX_SCALE), - UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + (i + 4) * CHAR_ARRAY_INDEX_SCALE), - hash); - } - switch(remainingBytes) { - case 7: - return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar( - UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + 3 * CHAR_ARRAY_INDEX_SCALE)), 13)) - * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE))) - * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET)); - case 6: - return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar( - UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + 2 * CHAR_ARRAY_INDEX_SCALE)), 13)) - * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET)); - case 5: - return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar( - UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)), 13)) - * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET)); - case 4: - return hash * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET)); - case 3: - return (hash * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE))) - * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET)); - case 2: - return hash * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET)); - case 1: - return hash * 31 + hashCodeAsciiSanitizeFromChar( - UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET)); - default: - return hash; - } - } - - static boolean hasCharArray(CharSequence data) { - return STRING_CHAR_VALUE_FIELD_OFFSET != -1 && data.getClass() == String.class; - } - - static boolean hasByteArray(CharSequence data) { - return STRING_BYTE_VALUE_FIELD_OFFSET != -1 && data.getClass() == String.class; - } - - /** - * Callers are expected to call {@link #hasCharArray(CharSequence)} before calling this method. - */ - static char[] charArray(CharSequence data) { - return (char[]) UNSAFE.getObject(data, STRING_CHAR_VALUE_FIELD_OFFSET); - } - - /** - * Callers are expected to call {@link #hasByteArray(CharSequence)} before calling this method. - */ - static byte[] byteArray(CharSequence data) { - return (byte[]) UNSAFE.getObject(data, STRING_BYTE_VALUE_FIELD_OFFSET); - } - static int hashCodeAsciiCompute(long value, int hash) { // masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash // code the same regardless of character case (upper case or lower case hash is the same). - return (hash * 31 + - // High order int - (int) ((value & 0x1f1f1f1f00000000L) >>> 32)) * 31 + + return hash * HASH_CODE_C1 + // Low order int - hashCodeAsciiSanitize((int) value); - } - - static int hashCodeAsciiComputeFromChar(long high, long low, int hash) { - // masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash - // code the same regardless of character case (upper case or lower case hash is the same). - return (hash * 31 + - // High order int (which is low order for char) - hashCodeAsciiSanitizeFromChar(low)) * 31 + - // Low order int (which is high order for char) - hashCodeAsciiSanitizeFromChar(high); + hashCodeAsciiSanitize((int) value) * HASH_CODE_C2 + + // High order int + (int) ((value & 0x1f1f1f1f00000000L) >>> 32); } static int hashCodeAsciiSanitize(int value) { return value & 0x1f1f1f1f; } - private static int hashCodeAsciiSanitizeFromChar(long value) { - return (int) (((value & 0x1f000000000000L) >>> 24) | - ((value & 0x1f00000000L) >>> 16) | - ((value & 0x1f0000) >>> 8) | - (value & 0x1f)); - } - static int hashCodeAsciiSanitize(short value) { return value & 0x1f1f; } - private static int hashCodeAsciiSanitizeFromChar(int value) { - return ((value & 0x1f0000) >>> 8) | (value & 0x1f); - } - - static int hashCodeAsciiSanitizeAsByte(char value) { - return value & 0x1f; - } - static int hashCodeAsciiSanitize(byte value) { return value & 0x1f; } - private static int hashCodeAsciiSanitizeFromChar(short value) { - return value & 0x1f; - } - static AtomicReferenceFieldUpdater newAtomicReferenceFieldUpdater( Class tclass, String fieldName) throws Exception { return new UnsafeAtomicReferenceFieldUpdater(UNSAFE, tclass, fieldName); diff --git a/common/src/test/java/io/netty/util/internal/PlatformDependentTest.java b/common/src/test/java/io/netty/util/internal/PlatformDependentTest.java index 53ea6a846f..0c38f85a56 100644 --- a/common/src/test/java/io/netty/util/internal/PlatformDependentTest.java +++ b/common/src/test/java/io/netty/util/internal/PlatformDependentTest.java @@ -124,9 +124,6 @@ public class PlatformDependentTest { assertEquals("length=" + i, hashCodeAsciiSafe(bytes, 0, bytes.length), hashCodeAscii(bytes, 0, bytes.length)); - assertEquals("length=" + i, - hashCodeAsciiSafe(string), - hashCodeAscii(string)); assertEquals("length=" + i, hashCodeAscii(bytes, 0, bytes.length), hashCodeAscii(string)); diff --git a/microbench/src/main/java/io/netty/microbenchmark/common/AsciiStringBenchmark.java b/microbench/src/main/java/io/netty/microbenchmark/common/AsciiStringBenchmark.java index 1dce6ad15c..5e498fd79f 100644 --- a/microbench/src/main/java/io/netty/microbenchmark/common/AsciiStringBenchmark.java +++ b/microbench/src/main/java/io/netty/microbenchmark/common/AsciiStringBenchmark.java @@ -34,7 +34,7 @@ import java.util.Random; @Warmup(iterations = 5) public class AsciiStringBenchmark extends AbstractMicrobenchmark { - @Param({ "3", "5", "7", "8", "10", "20", "50" }) + @Param({ "3", "5", "7", "8", "10", "20", "50", "100", "1000" }) public int size; private AsciiString asciiString;