Remove unsafe char[] access in PlatformDependent

Motivation:
PlatformDependent attempts to use reflection to get the underlying char[] (or byte[]) from String objects. This is fragile as if the String implementation does not utilize the full array, and instead uses a subset of the array, this optimization is invalid. OpenJDK6 and some earlier versions of OpenJDK7 String have the capability to use a subsection of the underlying char[].

Modifications:
- PlatformDependent should not attempt to use the underlying array from String (or other data types) via reflection

Result:
PlatformDependent hash code generation for CharSequence does not depend upon specific JDK implementation details.
This commit is contained in:
Scott Mitchell 2016-06-22 18:07:15 -07:00
parent f3dc483c99
commit a7f7d9c8e0
6 changed files with 343 additions and 284 deletions

View File

@ -0,0 +1,177 @@
/*
* Copyright 2016 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License, version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package io.netty.handler.codec.http2;
import io.netty.handler.codec.http.HttpHeaderNames;
import io.netty.handler.codec.http.HttpHeaderValues;
import io.netty.util.AsciiString;
import io.netty.util.internal.PlatformDependent;
import org.junit.Ignore;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.lang.reflect.Field;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@Ignore
public final class HashCollisionTest {
private HashCollisionTest() { }
public static void main(String[] args) throws IllegalAccessException, IOException, URISyntaxException {
// Big initial size for when all name sources are pulled in.
List<CharSequence> strings = new ArrayList<CharSequence>(350000);
addHttpHeaderNames(strings);
addHttpHeaderValues(strings);
addHttp2HeaderNames(strings);
addWordsFromFile(new File("/usr/share/dict/words"), strings);
// More "english words" can be found here:
// https://gist.github.com/Scottmitch/de2f03912778016ecee3c140478f07e0#file-englishwords-txt
Map<Integer, List<CharSequence>> dups = calculateDuplicates(strings, new Function<CharSequence, Integer>() {
@Override
public Integer apply(CharSequence string) {
int h = 0;
for (int i = 0; i < string.length(); ++i) {
// masking with 0x1F reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
h = h * 31 + (string.charAt(i) & 0x1F);
}
return h;
}
});
PrintStream writer = System.out;
writer.println("==Old Duplicates==");
printResults(writer, dups);
dups = calculateDuplicates(strings, new Function<CharSequence, Integer>() {
@Override
public Integer apply(CharSequence string) {
return PlatformDependent.hashCodeAscii(string);
}
});
writer.println();
writer.println("==New Duplicates==");
printResults(writer, dups);
}
private static void addHttpHeaderNames(List<CharSequence> values) throws IllegalAccessException {
for (Field f : HttpHeaderNames.class.getFields()) {
if (f.getType() == AsciiString.class) {
values.add((AsciiString) f.get(null));
}
}
}
private static void addHttpHeaderValues(List<CharSequence> values) throws IllegalAccessException {
for (Field f : HttpHeaderValues.class.getFields()) {
if (f.getType() == AsciiString.class) {
values.add((AsciiString) f.get(null));
}
}
}
private static void addHttp2HeaderNames(List<CharSequence> values) throws IllegalAccessException {
for (Http2Headers.PseudoHeaderName name : Http2Headers.PseudoHeaderName.values()) {
values.add(name.value());
}
}
private static void addWordsFromFile(File file, List<CharSequence> values)
throws IllegalAccessException, IOException {
BufferedReader br = new BufferedReader(new FileReader(file));
try {
String line;
while ((line = br.readLine()) != null) {
// Make a "best effort" to prune input which contains characters that are not valid in HTTP header names
if (line.indexOf('\'') < 0) {
values.add(line);
}
}
} finally {
br.close();
}
}
private static Map<Integer, List<CharSequence>> calculateDuplicates(List<CharSequence> strings,
Function<CharSequence, Integer> hasher) {
Map<Integer, List<CharSequence>> hashResults = new HashMap<Integer, List<CharSequence>>();
Set<Integer> duplicateHashCodes = new HashSet<Integer>();
for (CharSequence str : strings) {
Integer hash = hasher.apply(str);
List<CharSequence> results = hashResults.get(hash);
if (results == null) {
results = new ArrayList<CharSequence>(1);
hashResults.put(hash, results);
} else {
duplicateHashCodes.add(hash);
}
results.add(str);
}
if (duplicateHashCodes.isEmpty()) {
return Collections.emptyMap();
}
Map<Integer, List<CharSequence>> duplicates =
new HashMap<Integer, List<CharSequence>>(duplicateHashCodes.size());
for (Integer duplicateHashCode : duplicateHashCodes) {
List<CharSequence> realDups = new ArrayList<CharSequence>(2);
Iterator<CharSequence> itr = hashResults.get(duplicateHashCode).iterator();
// there should be at least 2 elements in the list ... bcz there may be duplicates
realDups.add(itr.next());
checknext: do {
CharSequence next = itr.next();
for (CharSequence potentialDup : realDups) {
if (!AsciiString.contentEqualsIgnoreCase(next, potentialDup)) {
realDups.add(next);
break checknext;
}
}
} while (itr.hasNext());
if (realDups.size() > 1) {
duplicates.put(duplicateHashCode, realDups);
}
}
return duplicates;
}
private static void printResults(PrintStream stream, Map<Integer, List<CharSequence>> dups) {
stream.println("Number duplicates: " + dups.size());
for (Entry<Integer, List<CharSequence>> entry : dups.entrySet()) {
stream.print(entry.getValue().size() + " duplicates for hash: " + entry.getKey() + " values: ");
for (CharSequence str : entry.getValue()) {
stream.print("[" + str + "] ");
}
stream.println();
}
}
private interface Function<P, R> {
R apply(P param);
}
}

View File

@ -211,7 +211,7 @@ public final class HeadersUtils {
@Override
public Object[] toArray() {
Object[] arr = new String[size()];
Object[] arr = new Object[size()];
fillArray(arr);
return arr;
}

View File

@ -56,9 +56,9 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static io.netty.util.internal.PlatformDependent0.HASH_CODE_ASCII_SEED;
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiCompute;
import static io.netty.util.internal.PlatformDependent0.HASH_CODE_C1;
import static io.netty.util.internal.PlatformDependent0.HASH_CODE_C2;
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitize;
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitizeAsByte;
import static io.netty.util.internal.PlatformDependent0.unalignedAccess;
/**
@ -431,27 +431,6 @@ public final class PlatformDependent {
((long) bytes[offset + 7] & 0xff) << 56;
}
private static long getLongFromBytesSafe(CharSequence bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return (long) bytes.charAt(offset) << 56 |
((long) bytes.charAt(offset + 1) & 0xff) << 48 |
((long) bytes.charAt(offset + 2) & 0xff) << 40 |
((long) bytes.charAt(offset + 3) & 0xff) << 32 |
((long) bytes.charAt(offset + 4) & 0xff) << 24 |
((long) bytes.charAt(offset + 5) & 0xff) << 16 |
((long) bytes.charAt(offset + 6) & 0xff) << 8 |
(long) bytes.charAt(offset + 7) & 0xff;
}
return (long) bytes.charAt(offset) & 0xff |
((long) bytes.charAt(offset + 1) & 0xff) << 8 |
((long) bytes.charAt(offset + 2) & 0xff) << 16 |
((long) bytes.charAt(offset + 3) & 0xff) << 24 |
((long) bytes.charAt(offset + 4) & 0xff) << 32 |
((long) bytes.charAt(offset + 5) & 0xff) << 40 |
((long) bytes.charAt(offset + 6) & 0xff) << 48 |
((long) bytes.charAt(offset + 7) & 0xff) << 56;
}
private static int getIntSafe(byte[] bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return bytes[offset] << 24 |
@ -465,19 +444,6 @@ public final class PlatformDependent {
bytes[offset + 3] << 24;
}
private static int getIntFromBytesSafe(CharSequence bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return bytes.charAt(offset) << 24 |
(bytes.charAt(offset + 1) & 0xff) << 16 |
(bytes.charAt(offset + 2) & 0xff) << 8 |
bytes.charAt(offset + 3) & 0xff;
}
return bytes.charAt(offset) & 0xff |
(bytes.charAt(offset + 1) & 0xff) << 8 |
(bytes.charAt(offset + 2) & 0xff) << 16 |
bytes.charAt(offset + 3) << 24;
}
private static short getShortSafe(byte[] bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return (short) (bytes[offset] << 8 | (bytes[offset + 1] & 0xff));
@ -485,11 +451,54 @@ public final class PlatformDependent {
return (short) (bytes[offset] & 0xff | (bytes[offset + 1] << 8));
}
private static short getShortFromBytesSafe(CharSequence bytes, int offset) {
/**
* Identical to {@link PlatformDependent0#hashCodeAsciiCompute(long, int)} but for {@link CharSequence}.
*/
private static int hashCodeAsciiCompute(CharSequence value, int offset, int hash) {
// masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
return hash * HASH_CODE_C1 +
// Low order int
hashCodeAsciiSanitizeInt(value, offset) * HASH_CODE_C2 +
// High order int
hashCodeAsciiSanitizeInt(value, offset + 4);
}
/**
* Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(int)} but for {@link CharSequence}.
*/
private static int hashCodeAsciiSanitizeInt(CharSequence value, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return (short) (bytes.charAt(offset) << 8 | (bytes.charAt(offset + 1) & 0xff));
// mimic a unsafe.getInt call on a big endian machine
return (value.charAt(offset) & 0x1f) |
(value.charAt(offset + 2) & 0x1f) << 8 |
(value.charAt(offset + 1) & 0x1f) << 16 |
(value.charAt(offset) & 0x1f) << 24;
}
return (short) (bytes.charAt(offset) & 0xff | (bytes.charAt(offset + 1) << 8));
return (value.charAt(offset + 3) & 0x1f) << 24 |
(value.charAt(offset + 2) & 0x1f) << 16 |
(value.charAt(offset + 1) & 0x1f) << 8 |
(value.charAt(offset) & 0x1f);
}
/**
* Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(short)} but for {@link CharSequence}.
*/
private static int hashCodeAsciiSanitizeShort(CharSequence value, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
// mimic a unsafe.getShort call on a big endian machine
return (value.charAt(offset + 1) & 0x1f) |
(value.charAt(offset) & 0x1f) << 8;
}
return (value.charAt(offset + 1) & 0x1f) << 8 |
(value.charAt(offset) & 0x1f);
}
/**
* Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(byte)} but for {@link CharSequence}.
*/
private static int hashCodeAsciiSanitizsByte(char value) {
return value & 0x1f;
}
public static void putOrderedObject(Object object, long address, Object value) {
@ -676,10 +685,9 @@ public final class PlatformDependent {
* The resulting hash code will be case insensitive.
*/
public static int hashCodeAscii(byte[] bytes, int startPos, int length) {
if (!hasUnsafe() || !unalignedAccess()) {
return hashCodeAsciiSafe(bytes, startPos, length);
}
return PlatformDependent0.hashCodeAscii(bytes, startPos, length);
return !hasUnsafe() || !unalignedAccess() ?
hashCodeAsciiSafe(bytes, startPos, length) :
PlatformDependent0.hashCodeAscii(bytes, startPos, length);
}
/**
@ -693,14 +701,83 @@ public final class PlatformDependent {
* The resulting hash code will be case insensitive.
*/
public static int hashCodeAscii(CharSequence bytes) {
if (!hasUnsafe() || !unalignedAccess()) {
return hashCodeAsciiSafe(bytes);
} else if (PlatformDependent0.hasCharArray(bytes)) {
return PlatformDependent0.hashCodeAscii(PlatformDependent0.charArray(bytes));
} else if (PlatformDependent0.hasByteArray(bytes)) {
return PlatformDependent0.hashCodeAscii(PlatformDependent0.byteArray(bytes));
int hash = HASH_CODE_ASCII_SEED;
final int remainingBytes = bytes.length() & 7;
// Benchmarking shows that by just naively looping for inputs 8~31 bytes long we incur a relatively large
// performance penalty (only achieve about 60% performance of loop which iterates over each char). So because
// of this we take special provisions to unroll the looping for these conditions.
switch (bytes.length()) {
case 31:
case 30:
case 29:
case 28:
case 27:
case 26:
case 25:
case 24:
hash = hashCodeAsciiCompute(bytes, bytes.length() - 24,
hashCodeAsciiCompute(bytes, bytes.length() - 16,
hashCodeAsciiCompute(bytes, bytes.length() - 8, hash)));
break;
case 23:
case 22:
case 21:
case 20:
case 19:
case 18:
case 17:
case 16:
hash = hashCodeAsciiCompute(bytes, bytes.length() - 16,
hashCodeAsciiCompute(bytes, bytes.length() - 8, hash));
break;
case 15:
case 14:
case 13:
case 12:
case 11:
case 10:
case 9:
case 8:
hash = hashCodeAsciiCompute(bytes, bytes.length() - 8, hash);
break;
case 7:
case 6:
case 5:
case 4:
case 3:
case 2:
case 1:
case 0:
break;
default:
for (int i = bytes.length() - 8; i >= remainingBytes; i -= 8) {
hash = hashCodeAsciiCompute(bytes, i, hash);
}
break;
}
switch(remainingBytes) {
case 7:
return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0)))
* HASH_CODE_C2 + hashCodeAsciiSanitizeShort(bytes, 1))
* HASH_CODE_C1 + hashCodeAsciiSanitizeInt(bytes, 3);
case 6:
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizeShort(bytes, 0))
* HASH_CODE_C2 + hashCodeAsciiSanitizeInt(bytes, 2);
case 5:
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0)))
* HASH_CODE_C2 + hashCodeAsciiSanitizeInt(bytes, 1);
case 4:
return hash * HASH_CODE_C1 + hashCodeAsciiSanitizeInt(bytes, 0);
case 3:
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0)))
* HASH_CODE_C2 + hashCodeAsciiSanitizeShort(bytes, 1);
case 2:
return hash * HASH_CODE_C1 + hashCodeAsciiSanitizeShort(bytes, 0);
case 1:
return hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0));
default:
return hash;
}
return hashCodeAsciiSafe(bytes);
}
/**
@ -1251,62 +1328,28 @@ public final class PlatformDependent {
final int remainingBytes = length & 7;
final int end = startPos + remainingBytes;
for (int i = startPos - 8 + length; i >= end; i -= 8) {
hash = hashCodeAsciiCompute(getLongSafe(bytes, i), hash);
hash = PlatformDependent0.hashCodeAsciiCompute(getLongSafe(bytes, i), hash);
}
switch(remainingBytes) {
case 7:
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 3)), 13))
* 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]))
* HASH_CODE_C2 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
* HASH_CODE_C1 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 3));
case 6:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 2)), 13))
* 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos)))
* HASH_CODE_C2 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 2));
case 5:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 1)), 13))
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]))
* HASH_CODE_C2 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 1));
case 4:
return hash * 31 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos));
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos));
case 3:
return (hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]))
* HASH_CODE_C2 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1));
case 2:
return hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
case 1:
return hash * 31 + hashCodeAsciiSanitize(bytes[startPos]);
default:
return hash;
}
}
/**
* Package private for testing purposes only!
*/
static int hashCodeAsciiSafe(CharSequence bytes) {
int hash = HASH_CODE_ASCII_SEED;
final int remainingBytes = bytes.length() & 7;
for (int i = bytes.length() - 8; i >= remainingBytes; i -= 8) {
hash = hashCodeAsciiCompute(getLongFromBytesSafe(bytes, i), hash);
}
switch(remainingBytes) {
case 7:
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 3)), 13))
* 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1)))
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
case 6:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 2)), 13))
* 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0));
case 5:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 1)), 13))
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
case 4:
return hash * 31 + hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 0));
case 3:
return (hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1)))
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
case 2:
return hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0));
case 1:
return hash * 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]);
default:
return hash;
}

View File

@ -41,13 +41,12 @@ final class PlatformDependent0 {
static final Unsafe UNSAFE;
private static final long ADDRESS_FIELD_OFFSET;
private static final long BYTE_ARRAY_BASE_OFFSET;
private static final long CHAR_ARRAY_BASE_OFFSET;
private static final long CHAR_ARRAY_INDEX_SCALE;
private static final long STRING_CHAR_VALUE_FIELD_OFFSET;
private static final long STRING_BYTE_VALUE_FIELD_OFFSET;
private static final Constructor<?> DIRECT_BUFFER_CONSTRUCTOR;
static final int HASH_CODE_ASCII_SEED = 0xc2b2ae35; // constant borrowed from murmur3
// constants borrowed from murmur3
static final int HASH_CODE_ASCII_SEED = 0xc2b2ae35;
static final int HASH_CODE_C1 = 0x1b873593;
static final int HASH_CODE_C2 = 0x1b873593;
/**
* Limits the number of bytes to copy per {@link Unsafe#copyMemory(long, long, long)} to allow safepoint polling
@ -117,9 +116,8 @@ final class PlatformDependent0 {
if (unsafe == null) {
ADDRESS_FIELD_OFFSET = -1;
BYTE_ARRAY_BASE_OFFSET = CHAR_ARRAY_BASE_OFFSET = CHAR_ARRAY_INDEX_SCALE = -1;
BYTE_ARRAY_BASE_OFFSET = -1;
UNALIGNED = false;
STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1;
DIRECT_BUFFER_CONSTRUCTOR = null;
} else {
Constructor<?> directBufferConstructor;
@ -142,8 +140,6 @@ final class PlatformDependent0 {
ADDRESS_FIELD_OFFSET = objectFieldOffset(addressField);
BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class);
CHAR_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(char[].class);
CHAR_ARRAY_INDEX_SCALE = UNSAFE.arrayIndexScale(char[].class);
boolean unaligned;
try {
Class<?> bitsClass = Class.forName("java.nio.Bits", false, ClassLoader.getSystemClassLoader());
@ -159,48 +155,6 @@ final class PlatformDependent0 {
UNALIGNED = unaligned;
logger.debug("java.nio.Bits.unaligned: {}", UNALIGNED);
Field stringValueField = null;
try {
stringValueField = AccessController.doPrivileged(new PrivilegedAction<Field>() {
@Override
public Field run() {
try {
Field f = String.class.getDeclaredField("value");
f.setAccessible(true);
return f;
} catch (NoSuchFieldException e) {
logger.info("Failed to find String value array (please report an issue)." +
"String hash code optimizations are disabled.", e);
} catch (SecurityException e) {
logger.debug("No permissions to get String value array." +
"String hash code optimizations are disabled.", e);
}
return null;
}
});
} catch (Throwable t) {
logger.debug("AccessController.doPrivileged failed to get String value array." +
"String hash code optimizations are disabled.", t);
}
if (stringValueField == null) {
STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1;
} else {
long stringValueFieldOffset = UNSAFE.objectFieldOffset(stringValueField);
Object o = UNSAFE.getObject("", stringValueFieldOffset);
if (char[].class.isInstance(o)) {
STRING_CHAR_VALUE_FIELD_OFFSET = stringValueFieldOffset;
STRING_BYTE_VALUE_FIELD_OFFSET = -1;
} else if (byte[].class.isInstance(o)) {
STRING_CHAR_VALUE_FIELD_OFFSET = -1;
STRING_BYTE_VALUE_FIELD_OFFSET = stringValueFieldOffset;
} else {
STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1;
logger.info("Unexpected type [" + o.getClass() + "] for String value array." +
"String hash code optimizations are disabled.");
}
}
}
logger.debug("java.nio.DirectByteBuffer.<init>(long, int): {}",
@ -456,173 +410,61 @@ final class PlatformDependent0 {
}
}
static int hashCodeAscii(byte[] bytes) {
return hashCodeAscii(bytes, 0, bytes.length);
}
/**
* This must remain consistent with {@link #hashCodeAscii(char[])}.
*/
static int hashCodeAscii(byte[] bytes, int startPos, int length) {
int hash = HASH_CODE_ASCII_SEED;
final long baseOffset = BYTE_ARRAY_BASE_OFFSET + startPos;
final int remainingBytes = length & 7;
if (length > 7) { // Fast path for small sized inputs. Benchmarking shows this is beneficial.
final long end = baseOffset + remainingBytes;
for (long i = baseOffset - 8 + length; i >= end; i -= 8) {
hash = hashCodeAsciiCompute(UNSAFE.getLong(bytes, i), hash);
}
final long end = baseOffset + remainingBytes;
for (long i = baseOffset - 8 + length; i >= end; i -= 8) {
hash = hashCodeAsciiCompute(UNSAFE.getLong(bytes, i), hash);
}
switch(remainingBytes) {
case 7:
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 3)), 13))
* 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)))
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)))
* HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)))
* HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 3));
case 6:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 2)), 13))
* 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset));
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset)))
* HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 2));
case 5:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 1)), 13))
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)))
* HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 1));
case 4:
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset));
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset));
case 3:
return (hash * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)))
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)))
* HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1));
case 2:
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset));
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset));
case 1:
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
default:
return hash;
}
}
/**
* This method assumes that {@code bytes} is equivalent to a {@code byte[]} but just using {@code char[]}
* for storage. The MSB of each {@code char} from {@code bytes} is ignored.
* <p>
* This must remain consistent with {@link #hashCodeAscii(byte[], int, int)}.
*/
static int hashCodeAscii(char[] bytes) {
int hash = HASH_CODE_ASCII_SEED;
final int remainingBytes = bytes.length & 7;
for (int i = bytes.length - 8; i >= remainingBytes; i -= 8) {
hash = hashCodeAsciiComputeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + i * CHAR_ARRAY_INDEX_SCALE),
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + (i + 4) * CHAR_ARRAY_INDEX_SCALE),
hash);
}
switch(remainingBytes) {
case 7:
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + 3 * CHAR_ARRAY_INDEX_SCALE)), 13))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
case 6:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + 2 * CHAR_ARRAY_INDEX_SCALE)), 13))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET));
case 5:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)), 13))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
case 4:
return hash * 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET));
case 3:
return (hash * 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
case 2:
return hash * 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET));
case 1:
return hash * 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
default:
return hash;
}
}
static boolean hasCharArray(CharSequence data) {
return STRING_CHAR_VALUE_FIELD_OFFSET != -1 && data.getClass() == String.class;
}
static boolean hasByteArray(CharSequence data) {
return STRING_BYTE_VALUE_FIELD_OFFSET != -1 && data.getClass() == String.class;
}
/**
* Callers are expected to call {@link #hasCharArray(CharSequence)} before calling this method.
*/
static char[] charArray(CharSequence data) {
return (char[]) UNSAFE.getObject(data, STRING_CHAR_VALUE_FIELD_OFFSET);
}
/**
* Callers are expected to call {@link #hasByteArray(CharSequence)} before calling this method.
*/
static byte[] byteArray(CharSequence data) {
return (byte[]) UNSAFE.getObject(data, STRING_BYTE_VALUE_FIELD_OFFSET);
}
static int hashCodeAsciiCompute(long value, int hash) {
// masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
return (hash * 31 +
// High order int
(int) ((value & 0x1f1f1f1f00000000L) >>> 32)) * 31 +
return hash * HASH_CODE_C1 +
// Low order int
hashCodeAsciiSanitize((int) value);
}
static int hashCodeAsciiComputeFromChar(long high, long low, int hash) {
// masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
return (hash * 31 +
// High order int (which is low order for char)
hashCodeAsciiSanitizeFromChar(low)) * 31 +
// Low order int (which is high order for char)
hashCodeAsciiSanitizeFromChar(high);
hashCodeAsciiSanitize((int) value) * HASH_CODE_C2 +
// High order int
(int) ((value & 0x1f1f1f1f00000000L) >>> 32);
}
static int hashCodeAsciiSanitize(int value) {
return value & 0x1f1f1f1f;
}
private static int hashCodeAsciiSanitizeFromChar(long value) {
return (int) (((value & 0x1f000000000000L) >>> 24) |
((value & 0x1f00000000L) >>> 16) |
((value & 0x1f0000) >>> 8) |
(value & 0x1f));
}
static int hashCodeAsciiSanitize(short value) {
return value & 0x1f1f;
}
private static int hashCodeAsciiSanitizeFromChar(int value) {
return ((value & 0x1f0000) >>> 8) | (value & 0x1f);
}
static int hashCodeAsciiSanitizeAsByte(char value) {
return value & 0x1f;
}
static int hashCodeAsciiSanitize(byte value) {
return value & 0x1f;
}
private static int hashCodeAsciiSanitizeFromChar(short value) {
return value & 0x1f;
}
static <U, W> AtomicReferenceFieldUpdater<U, W> newAtomicReferenceFieldUpdater(
Class<? super U> tclass, String fieldName) throws Exception {
return new UnsafeAtomicReferenceFieldUpdater<U, W>(UNSAFE, tclass, fieldName);

View File

@ -124,9 +124,6 @@ public class PlatformDependentTest {
assertEquals("length=" + i,
hashCodeAsciiSafe(bytes, 0, bytes.length),
hashCodeAscii(bytes, 0, bytes.length));
assertEquals("length=" + i,
hashCodeAsciiSafe(string),
hashCodeAscii(string));
assertEquals("length=" + i,
hashCodeAscii(bytes, 0, bytes.length),
hashCodeAscii(string));

View File

@ -34,7 +34,7 @@ import java.util.Random;
@Warmup(iterations = 5)
public class AsciiStringBenchmark extends AbstractMicrobenchmark {
@Param({ "3", "5", "7", "8", "10", "20", "50" })
@Param({ "3", "5", "7", "8", "10", "20", "50", "100", "1000" })
public int size;
private AsciiString asciiString;