Remove unsafe char[] access in PlatformDependent
Motivation: PlatformDependent attempts to use reflection to get the underlying char[] (or byte[]) from String objects. This is fragile as if the String implementation does not utilize the full array, and instead uses a subset of the array, this optimization is invalid. OpenJDK6 and some earlier versions of OpenJDK7 String have the capability to use a subsection of the underlying char[]. Modifications: - PlatformDependent should not attempt to use the underlying array from String (or other data types) via reflection Result: PlatformDependent hash code generation for CharSequence does not depend upon specific JDK implementation details.
This commit is contained in:
parent
f3dc483c99
commit
a7f7d9c8e0
@ -0,0 +1,177 @@
|
||||
/*
|
||||
* Copyright 2016 The Netty Project
|
||||
*
|
||||
* The Netty Project licenses this file to you under the Apache License, version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License. You may obtain a
|
||||
* copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
* or implied. See the License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
package io.netty.handler.codec.http2;
|
||||
|
||||
import io.netty.handler.codec.http.HttpHeaderNames;
|
||||
import io.netty.handler.codec.http.HttpHeaderValues;
|
||||
import io.netty.util.AsciiString;
|
||||
import io.netty.util.internal.PlatformDependent;
|
||||
import org.junit.Ignore;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Field;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
@Ignore
|
||||
public final class HashCollisionTest {
|
||||
private HashCollisionTest() { }
|
||||
|
||||
public static void main(String[] args) throws IllegalAccessException, IOException, URISyntaxException {
|
||||
// Big initial size for when all name sources are pulled in.
|
||||
List<CharSequence> strings = new ArrayList<CharSequence>(350000);
|
||||
addHttpHeaderNames(strings);
|
||||
addHttpHeaderValues(strings);
|
||||
addHttp2HeaderNames(strings);
|
||||
addWordsFromFile(new File("/usr/share/dict/words"), strings);
|
||||
// More "english words" can be found here:
|
||||
// https://gist.github.com/Scottmitch/de2f03912778016ecee3c140478f07e0#file-englishwords-txt
|
||||
|
||||
Map<Integer, List<CharSequence>> dups = calculateDuplicates(strings, new Function<CharSequence, Integer>() {
|
||||
@Override
|
||||
public Integer apply(CharSequence string) {
|
||||
int h = 0;
|
||||
for (int i = 0; i < string.length(); ++i) {
|
||||
// masking with 0x1F reduces the number of overall bits that impact the hash code but makes the hash
|
||||
// code the same regardless of character case (upper case or lower case hash is the same).
|
||||
h = h * 31 + (string.charAt(i) & 0x1F);
|
||||
}
|
||||
return h;
|
||||
}
|
||||
});
|
||||
PrintStream writer = System.out;
|
||||
writer.println("==Old Duplicates==");
|
||||
printResults(writer, dups);
|
||||
|
||||
dups = calculateDuplicates(strings, new Function<CharSequence, Integer>() {
|
||||
@Override
|
||||
public Integer apply(CharSequence string) {
|
||||
return PlatformDependent.hashCodeAscii(string);
|
||||
}
|
||||
});
|
||||
writer.println();
|
||||
writer.println("==New Duplicates==");
|
||||
printResults(writer, dups);
|
||||
}
|
||||
|
||||
private static void addHttpHeaderNames(List<CharSequence> values) throws IllegalAccessException {
|
||||
for (Field f : HttpHeaderNames.class.getFields()) {
|
||||
if (f.getType() == AsciiString.class) {
|
||||
values.add((AsciiString) f.get(null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void addHttpHeaderValues(List<CharSequence> values) throws IllegalAccessException {
|
||||
for (Field f : HttpHeaderValues.class.getFields()) {
|
||||
if (f.getType() == AsciiString.class) {
|
||||
values.add((AsciiString) f.get(null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void addHttp2HeaderNames(List<CharSequence> values) throws IllegalAccessException {
|
||||
for (Http2Headers.PseudoHeaderName name : Http2Headers.PseudoHeaderName.values()) {
|
||||
values.add(name.value());
|
||||
}
|
||||
}
|
||||
|
||||
private static void addWordsFromFile(File file, List<CharSequence> values)
|
||||
throws IllegalAccessException, IOException {
|
||||
BufferedReader br = new BufferedReader(new FileReader(file));
|
||||
try {
|
||||
String line;
|
||||
while ((line = br.readLine()) != null) {
|
||||
// Make a "best effort" to prune input which contains characters that are not valid in HTTP header names
|
||||
if (line.indexOf('\'') < 0) {
|
||||
values.add(line);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
br.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Integer, List<CharSequence>> calculateDuplicates(List<CharSequence> strings,
|
||||
Function<CharSequence, Integer> hasher) {
|
||||
Map<Integer, List<CharSequence>> hashResults = new HashMap<Integer, List<CharSequence>>();
|
||||
Set<Integer> duplicateHashCodes = new HashSet<Integer>();
|
||||
|
||||
for (CharSequence str : strings) {
|
||||
Integer hash = hasher.apply(str);
|
||||
List<CharSequence> results = hashResults.get(hash);
|
||||
if (results == null) {
|
||||
results = new ArrayList<CharSequence>(1);
|
||||
hashResults.put(hash, results);
|
||||
} else {
|
||||
duplicateHashCodes.add(hash);
|
||||
}
|
||||
results.add(str);
|
||||
}
|
||||
|
||||
if (duplicateHashCodes.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
Map<Integer, List<CharSequence>> duplicates =
|
||||
new HashMap<Integer, List<CharSequence>>(duplicateHashCodes.size());
|
||||
for (Integer duplicateHashCode : duplicateHashCodes) {
|
||||
List<CharSequence> realDups = new ArrayList<CharSequence>(2);
|
||||
Iterator<CharSequence> itr = hashResults.get(duplicateHashCode).iterator();
|
||||
// there should be at least 2 elements in the list ... bcz there may be duplicates
|
||||
realDups.add(itr.next());
|
||||
checknext: do {
|
||||
CharSequence next = itr.next();
|
||||
for (CharSequence potentialDup : realDups) {
|
||||
if (!AsciiString.contentEqualsIgnoreCase(next, potentialDup)) {
|
||||
realDups.add(next);
|
||||
break checknext;
|
||||
}
|
||||
}
|
||||
} while (itr.hasNext());
|
||||
|
||||
if (realDups.size() > 1) {
|
||||
duplicates.put(duplicateHashCode, realDups);
|
||||
}
|
||||
}
|
||||
return duplicates;
|
||||
}
|
||||
|
||||
private static void printResults(PrintStream stream, Map<Integer, List<CharSequence>> dups) {
|
||||
stream.println("Number duplicates: " + dups.size());
|
||||
for (Entry<Integer, List<CharSequence>> entry : dups.entrySet()) {
|
||||
stream.print(entry.getValue().size() + " duplicates for hash: " + entry.getKey() + " values: ");
|
||||
for (CharSequence str : entry.getValue()) {
|
||||
stream.print("[" + str + "] ");
|
||||
}
|
||||
stream.println();
|
||||
}
|
||||
}
|
||||
|
||||
private interface Function<P, R> {
|
||||
R apply(P param);
|
||||
}
|
||||
}
|
@ -211,7 +211,7 @@ public final class HeadersUtils {
|
||||
|
||||
@Override
|
||||
public Object[] toArray() {
|
||||
Object[] arr = new String[size()];
|
||||
Object[] arr = new Object[size()];
|
||||
fillArray(arr);
|
||||
return arr;
|
||||
}
|
||||
|
@ -56,9 +56,9 @@ import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static io.netty.util.internal.PlatformDependent0.HASH_CODE_ASCII_SEED;
|
||||
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiCompute;
|
||||
import static io.netty.util.internal.PlatformDependent0.HASH_CODE_C1;
|
||||
import static io.netty.util.internal.PlatformDependent0.HASH_CODE_C2;
|
||||
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitize;
|
||||
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitizeAsByte;
|
||||
import static io.netty.util.internal.PlatformDependent0.unalignedAccess;
|
||||
|
||||
/**
|
||||
@ -431,27 +431,6 @@ public final class PlatformDependent {
|
||||
((long) bytes[offset + 7] & 0xff) << 56;
|
||||
}
|
||||
|
||||
private static long getLongFromBytesSafe(CharSequence bytes, int offset) {
|
||||
if (BIG_ENDIAN_NATIVE_ORDER) {
|
||||
return (long) bytes.charAt(offset) << 56 |
|
||||
((long) bytes.charAt(offset + 1) & 0xff) << 48 |
|
||||
((long) bytes.charAt(offset + 2) & 0xff) << 40 |
|
||||
((long) bytes.charAt(offset + 3) & 0xff) << 32 |
|
||||
((long) bytes.charAt(offset + 4) & 0xff) << 24 |
|
||||
((long) bytes.charAt(offset + 5) & 0xff) << 16 |
|
||||
((long) bytes.charAt(offset + 6) & 0xff) << 8 |
|
||||
(long) bytes.charAt(offset + 7) & 0xff;
|
||||
}
|
||||
return (long) bytes.charAt(offset) & 0xff |
|
||||
((long) bytes.charAt(offset + 1) & 0xff) << 8 |
|
||||
((long) bytes.charAt(offset + 2) & 0xff) << 16 |
|
||||
((long) bytes.charAt(offset + 3) & 0xff) << 24 |
|
||||
((long) bytes.charAt(offset + 4) & 0xff) << 32 |
|
||||
((long) bytes.charAt(offset + 5) & 0xff) << 40 |
|
||||
((long) bytes.charAt(offset + 6) & 0xff) << 48 |
|
||||
((long) bytes.charAt(offset + 7) & 0xff) << 56;
|
||||
}
|
||||
|
||||
private static int getIntSafe(byte[] bytes, int offset) {
|
||||
if (BIG_ENDIAN_NATIVE_ORDER) {
|
||||
return bytes[offset] << 24 |
|
||||
@ -465,19 +444,6 @@ public final class PlatformDependent {
|
||||
bytes[offset + 3] << 24;
|
||||
}
|
||||
|
||||
private static int getIntFromBytesSafe(CharSequence bytes, int offset) {
|
||||
if (BIG_ENDIAN_NATIVE_ORDER) {
|
||||
return bytes.charAt(offset) << 24 |
|
||||
(bytes.charAt(offset + 1) & 0xff) << 16 |
|
||||
(bytes.charAt(offset + 2) & 0xff) << 8 |
|
||||
bytes.charAt(offset + 3) & 0xff;
|
||||
}
|
||||
return bytes.charAt(offset) & 0xff |
|
||||
(bytes.charAt(offset + 1) & 0xff) << 8 |
|
||||
(bytes.charAt(offset + 2) & 0xff) << 16 |
|
||||
bytes.charAt(offset + 3) << 24;
|
||||
}
|
||||
|
||||
private static short getShortSafe(byte[] bytes, int offset) {
|
||||
if (BIG_ENDIAN_NATIVE_ORDER) {
|
||||
return (short) (bytes[offset] << 8 | (bytes[offset + 1] & 0xff));
|
||||
@ -485,11 +451,54 @@ public final class PlatformDependent {
|
||||
return (short) (bytes[offset] & 0xff | (bytes[offset + 1] << 8));
|
||||
}
|
||||
|
||||
private static short getShortFromBytesSafe(CharSequence bytes, int offset) {
|
||||
/**
|
||||
* Identical to {@link PlatformDependent0#hashCodeAsciiCompute(long, int)} but for {@link CharSequence}.
|
||||
*/
|
||||
private static int hashCodeAsciiCompute(CharSequence value, int offset, int hash) {
|
||||
// masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash
|
||||
// code the same regardless of character case (upper case or lower case hash is the same).
|
||||
return hash * HASH_CODE_C1 +
|
||||
// Low order int
|
||||
hashCodeAsciiSanitizeInt(value, offset) * HASH_CODE_C2 +
|
||||
// High order int
|
||||
hashCodeAsciiSanitizeInt(value, offset + 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(int)} but for {@link CharSequence}.
|
||||
*/
|
||||
private static int hashCodeAsciiSanitizeInt(CharSequence value, int offset) {
|
||||
if (BIG_ENDIAN_NATIVE_ORDER) {
|
||||
return (short) (bytes.charAt(offset) << 8 | (bytes.charAt(offset + 1) & 0xff));
|
||||
// mimic a unsafe.getInt call on a big endian machine
|
||||
return (value.charAt(offset) & 0x1f) |
|
||||
(value.charAt(offset + 2) & 0x1f) << 8 |
|
||||
(value.charAt(offset + 1) & 0x1f) << 16 |
|
||||
(value.charAt(offset) & 0x1f) << 24;
|
||||
}
|
||||
return (short) (bytes.charAt(offset) & 0xff | (bytes.charAt(offset + 1) << 8));
|
||||
return (value.charAt(offset + 3) & 0x1f) << 24 |
|
||||
(value.charAt(offset + 2) & 0x1f) << 16 |
|
||||
(value.charAt(offset + 1) & 0x1f) << 8 |
|
||||
(value.charAt(offset) & 0x1f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(short)} but for {@link CharSequence}.
|
||||
*/
|
||||
private static int hashCodeAsciiSanitizeShort(CharSequence value, int offset) {
|
||||
if (BIG_ENDIAN_NATIVE_ORDER) {
|
||||
// mimic a unsafe.getShort call on a big endian machine
|
||||
return (value.charAt(offset + 1) & 0x1f) |
|
||||
(value.charAt(offset) & 0x1f) << 8;
|
||||
}
|
||||
return (value.charAt(offset + 1) & 0x1f) << 8 |
|
||||
(value.charAt(offset) & 0x1f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to {@link PlatformDependent0#hashCodeAsciiSanitize(byte)} but for {@link CharSequence}.
|
||||
*/
|
||||
private static int hashCodeAsciiSanitizsByte(char value) {
|
||||
return value & 0x1f;
|
||||
}
|
||||
|
||||
public static void putOrderedObject(Object object, long address, Object value) {
|
||||
@ -676,10 +685,9 @@ public final class PlatformDependent {
|
||||
* The resulting hash code will be case insensitive.
|
||||
*/
|
||||
public static int hashCodeAscii(byte[] bytes, int startPos, int length) {
|
||||
if (!hasUnsafe() || !unalignedAccess()) {
|
||||
return hashCodeAsciiSafe(bytes, startPos, length);
|
||||
}
|
||||
return PlatformDependent0.hashCodeAscii(bytes, startPos, length);
|
||||
return !hasUnsafe() || !unalignedAccess() ?
|
||||
hashCodeAsciiSafe(bytes, startPos, length) :
|
||||
PlatformDependent0.hashCodeAscii(bytes, startPos, length);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -693,14 +701,83 @@ public final class PlatformDependent {
|
||||
* The resulting hash code will be case insensitive.
|
||||
*/
|
||||
public static int hashCodeAscii(CharSequence bytes) {
|
||||
if (!hasUnsafe() || !unalignedAccess()) {
|
||||
return hashCodeAsciiSafe(bytes);
|
||||
} else if (PlatformDependent0.hasCharArray(bytes)) {
|
||||
return PlatformDependent0.hashCodeAscii(PlatformDependent0.charArray(bytes));
|
||||
} else if (PlatformDependent0.hasByteArray(bytes)) {
|
||||
return PlatformDependent0.hashCodeAscii(PlatformDependent0.byteArray(bytes));
|
||||
int hash = HASH_CODE_ASCII_SEED;
|
||||
final int remainingBytes = bytes.length() & 7;
|
||||
// Benchmarking shows that by just naively looping for inputs 8~31 bytes long we incur a relatively large
|
||||
// performance penalty (only achieve about 60% performance of loop which iterates over each char). So because
|
||||
// of this we take special provisions to unroll the looping for these conditions.
|
||||
switch (bytes.length()) {
|
||||
case 31:
|
||||
case 30:
|
||||
case 29:
|
||||
case 28:
|
||||
case 27:
|
||||
case 26:
|
||||
case 25:
|
||||
case 24:
|
||||
hash = hashCodeAsciiCompute(bytes, bytes.length() - 24,
|
||||
hashCodeAsciiCompute(bytes, bytes.length() - 16,
|
||||
hashCodeAsciiCompute(bytes, bytes.length() - 8, hash)));
|
||||
break;
|
||||
case 23:
|
||||
case 22:
|
||||
case 21:
|
||||
case 20:
|
||||
case 19:
|
||||
case 18:
|
||||
case 17:
|
||||
case 16:
|
||||
hash = hashCodeAsciiCompute(bytes, bytes.length() - 16,
|
||||
hashCodeAsciiCompute(bytes, bytes.length() - 8, hash));
|
||||
break;
|
||||
case 15:
|
||||
case 14:
|
||||
case 13:
|
||||
case 12:
|
||||
case 11:
|
||||
case 10:
|
||||
case 9:
|
||||
case 8:
|
||||
hash = hashCodeAsciiCompute(bytes, bytes.length() - 8, hash);
|
||||
break;
|
||||
case 7:
|
||||
case 6:
|
||||
case 5:
|
||||
case 4:
|
||||
case 3:
|
||||
case 2:
|
||||
case 1:
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
for (int i = bytes.length() - 8; i >= remainingBytes; i -= 8) {
|
||||
hash = hashCodeAsciiCompute(bytes, i, hash);
|
||||
}
|
||||
break;
|
||||
}
|
||||
switch(remainingBytes) {
|
||||
case 7:
|
||||
return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0)))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitizeShort(bytes, 1))
|
||||
* HASH_CODE_C1 + hashCodeAsciiSanitizeInt(bytes, 3);
|
||||
case 6:
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizeShort(bytes, 0))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitizeInt(bytes, 2);
|
||||
case 5:
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0)))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitizeInt(bytes, 1);
|
||||
case 4:
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitizeInt(bytes, 0);
|
||||
case 3:
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0)))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitizeShort(bytes, 1);
|
||||
case 2:
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitizeShort(bytes, 0);
|
||||
case 1:
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitizsByte(bytes.charAt(0));
|
||||
default:
|
||||
return hash;
|
||||
}
|
||||
return hashCodeAsciiSafe(bytes);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1251,62 +1328,28 @@ public final class PlatformDependent {
|
||||
final int remainingBytes = length & 7;
|
||||
final int end = startPos + remainingBytes;
|
||||
for (int i = startPos - 8 + length; i >= end; i -= 8) {
|
||||
hash = hashCodeAsciiCompute(getLongSafe(bytes, i), hash);
|
||||
hash = PlatformDependent0.hashCodeAsciiCompute(getLongSafe(bytes, i), hash);
|
||||
}
|
||||
switch(remainingBytes) {
|
||||
case 7:
|
||||
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 3)), 13))
|
||||
* 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
|
||||
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
|
||||
return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
|
||||
* HASH_CODE_C1 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 3));
|
||||
case 6:
|
||||
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 2)), 13))
|
||||
* 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos)))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 2));
|
||||
case 5:
|
||||
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 1)), 13))
|
||||
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 1));
|
||||
case 4:
|
||||
return hash * 31 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos));
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos));
|
||||
case 3:
|
||||
return (hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
|
||||
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1));
|
||||
case 2:
|
||||
return hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
|
||||
case 1:
|
||||
return hash * 31 + hashCodeAsciiSanitize(bytes[startPos]);
|
||||
default:
|
||||
return hash;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Package private for testing purposes only!
|
||||
*/
|
||||
static int hashCodeAsciiSafe(CharSequence bytes) {
|
||||
int hash = HASH_CODE_ASCII_SEED;
|
||||
final int remainingBytes = bytes.length() & 7;
|
||||
for (int i = bytes.length() - 8; i >= remainingBytes; i -= 8) {
|
||||
hash = hashCodeAsciiCompute(getLongFromBytesSafe(bytes, i), hash);
|
||||
}
|
||||
switch(remainingBytes) {
|
||||
case 7:
|
||||
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 3)), 13))
|
||||
* 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1)))
|
||||
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
|
||||
case 6:
|
||||
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 2)), 13))
|
||||
* 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0));
|
||||
case 5:
|
||||
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 1)), 13))
|
||||
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
|
||||
case 4:
|
||||
return hash * 31 + hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 0));
|
||||
case 3:
|
||||
return (hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1)))
|
||||
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
|
||||
case 2:
|
||||
return hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0));
|
||||
case 1:
|
||||
return hash * 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(bytes[startPos]);
|
||||
default:
|
||||
return hash;
|
||||
}
|
||||
|
@ -41,13 +41,12 @@ final class PlatformDependent0 {
|
||||
static final Unsafe UNSAFE;
|
||||
private static final long ADDRESS_FIELD_OFFSET;
|
||||
private static final long BYTE_ARRAY_BASE_OFFSET;
|
||||
private static final long CHAR_ARRAY_BASE_OFFSET;
|
||||
private static final long CHAR_ARRAY_INDEX_SCALE;
|
||||
private static final long STRING_CHAR_VALUE_FIELD_OFFSET;
|
||||
private static final long STRING_BYTE_VALUE_FIELD_OFFSET;
|
||||
private static final Constructor<?> DIRECT_BUFFER_CONSTRUCTOR;
|
||||
|
||||
static final int HASH_CODE_ASCII_SEED = 0xc2b2ae35; // constant borrowed from murmur3
|
||||
// constants borrowed from murmur3
|
||||
static final int HASH_CODE_ASCII_SEED = 0xc2b2ae35;
|
||||
static final int HASH_CODE_C1 = 0x1b873593;
|
||||
static final int HASH_CODE_C2 = 0x1b873593;
|
||||
|
||||
/**
|
||||
* Limits the number of bytes to copy per {@link Unsafe#copyMemory(long, long, long)} to allow safepoint polling
|
||||
@ -117,9 +116,8 @@ final class PlatformDependent0 {
|
||||
|
||||
if (unsafe == null) {
|
||||
ADDRESS_FIELD_OFFSET = -1;
|
||||
BYTE_ARRAY_BASE_OFFSET = CHAR_ARRAY_BASE_OFFSET = CHAR_ARRAY_INDEX_SCALE = -1;
|
||||
BYTE_ARRAY_BASE_OFFSET = -1;
|
||||
UNALIGNED = false;
|
||||
STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1;
|
||||
DIRECT_BUFFER_CONSTRUCTOR = null;
|
||||
} else {
|
||||
Constructor<?> directBufferConstructor;
|
||||
@ -142,8 +140,6 @@ final class PlatformDependent0 {
|
||||
|
||||
ADDRESS_FIELD_OFFSET = objectFieldOffset(addressField);
|
||||
BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class);
|
||||
CHAR_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(char[].class);
|
||||
CHAR_ARRAY_INDEX_SCALE = UNSAFE.arrayIndexScale(char[].class);
|
||||
boolean unaligned;
|
||||
try {
|
||||
Class<?> bitsClass = Class.forName("java.nio.Bits", false, ClassLoader.getSystemClassLoader());
|
||||
@ -159,48 +155,6 @@ final class PlatformDependent0 {
|
||||
|
||||
UNALIGNED = unaligned;
|
||||
logger.debug("java.nio.Bits.unaligned: {}", UNALIGNED);
|
||||
|
||||
Field stringValueField = null;
|
||||
try {
|
||||
stringValueField = AccessController.doPrivileged(new PrivilegedAction<Field>() {
|
||||
@Override
|
||||
public Field run() {
|
||||
try {
|
||||
Field f = String.class.getDeclaredField("value");
|
||||
f.setAccessible(true);
|
||||
return f;
|
||||
} catch (NoSuchFieldException e) {
|
||||
logger.info("Failed to find String value array (please report an issue)." +
|
||||
"String hash code optimizations are disabled.", e);
|
||||
} catch (SecurityException e) {
|
||||
logger.debug("No permissions to get String value array." +
|
||||
"String hash code optimizations are disabled.", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
} catch (Throwable t) {
|
||||
logger.debug("AccessController.doPrivileged failed to get String value array." +
|
||||
"String hash code optimizations are disabled.", t);
|
||||
}
|
||||
|
||||
if (stringValueField == null) {
|
||||
STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1;
|
||||
} else {
|
||||
long stringValueFieldOffset = UNSAFE.objectFieldOffset(stringValueField);
|
||||
Object o = UNSAFE.getObject("", stringValueFieldOffset);
|
||||
if (char[].class.isInstance(o)) {
|
||||
STRING_CHAR_VALUE_FIELD_OFFSET = stringValueFieldOffset;
|
||||
STRING_BYTE_VALUE_FIELD_OFFSET = -1;
|
||||
} else if (byte[].class.isInstance(o)) {
|
||||
STRING_CHAR_VALUE_FIELD_OFFSET = -1;
|
||||
STRING_BYTE_VALUE_FIELD_OFFSET = stringValueFieldOffset;
|
||||
} else {
|
||||
STRING_CHAR_VALUE_FIELD_OFFSET = STRING_BYTE_VALUE_FIELD_OFFSET = -1;
|
||||
logger.info("Unexpected type [" + o.getClass() + "] for String value array." +
|
||||
"String hash code optimizations are disabled.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.debug("java.nio.DirectByteBuffer.<init>(long, int): {}",
|
||||
@ -456,173 +410,61 @@ final class PlatformDependent0 {
|
||||
}
|
||||
}
|
||||
|
||||
static int hashCodeAscii(byte[] bytes) {
|
||||
return hashCodeAscii(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* This must remain consistent with {@link #hashCodeAscii(char[])}.
|
||||
*/
|
||||
static int hashCodeAscii(byte[] bytes, int startPos, int length) {
|
||||
int hash = HASH_CODE_ASCII_SEED;
|
||||
final long baseOffset = BYTE_ARRAY_BASE_OFFSET + startPos;
|
||||
final int remainingBytes = length & 7;
|
||||
if (length > 7) { // Fast path for small sized inputs. Benchmarking shows this is beneficial.
|
||||
final long end = baseOffset + remainingBytes;
|
||||
for (long i = baseOffset - 8 + length; i >= end; i -= 8) {
|
||||
hash = hashCodeAsciiCompute(UNSAFE.getLong(bytes, i), hash);
|
||||
}
|
||||
final long end = baseOffset + remainingBytes;
|
||||
for (long i = baseOffset - 8 + length; i >= end; i -= 8) {
|
||||
hash = hashCodeAsciiCompute(UNSAFE.getLong(bytes, i), hash);
|
||||
}
|
||||
switch(remainingBytes) {
|
||||
case 7:
|
||||
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 3)), 13))
|
||||
* 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)))
|
||||
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
|
||||
return ((hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)))
|
||||
* HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 3));
|
||||
case 6:
|
||||
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 2)), 13))
|
||||
* 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset));
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset)))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 2));
|
||||
case 5:
|
||||
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 1)), 13))
|
||||
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 1));
|
||||
case 4:
|
||||
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset));
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset));
|
||||
case 3:
|
||||
return (hash * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)))
|
||||
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
|
||||
return (hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset)))
|
||||
* HASH_CODE_C2 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1));
|
||||
case 2:
|
||||
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset));
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset));
|
||||
case 1:
|
||||
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
|
||||
return hash * HASH_CODE_C1 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
|
||||
default:
|
||||
return hash;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method assumes that {@code bytes} is equivalent to a {@code byte[]} but just using {@code char[]}
|
||||
* for storage. The MSB of each {@code char} from {@code bytes} is ignored.
|
||||
* <p>
|
||||
* This must remain consistent with {@link #hashCodeAscii(byte[], int, int)}.
|
||||
*/
|
||||
static int hashCodeAscii(char[] bytes) {
|
||||
int hash = HASH_CODE_ASCII_SEED;
|
||||
final int remainingBytes = bytes.length & 7;
|
||||
for (int i = bytes.length - 8; i >= remainingBytes; i -= 8) {
|
||||
hash = hashCodeAsciiComputeFromChar(
|
||||
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + i * CHAR_ARRAY_INDEX_SCALE),
|
||||
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + (i + 4) * CHAR_ARRAY_INDEX_SCALE),
|
||||
hash);
|
||||
}
|
||||
switch(remainingBytes) {
|
||||
case 7:
|
||||
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + 3 * CHAR_ARRAY_INDEX_SCALE)), 13))
|
||||
* 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)))
|
||||
* 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
|
||||
case 6:
|
||||
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + 2 * CHAR_ARRAY_INDEX_SCALE)), 13))
|
||||
* 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET));
|
||||
case 5:
|
||||
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)), 13))
|
||||
* 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
|
||||
case 4:
|
||||
return hash * 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET));
|
||||
case 3:
|
||||
return (hash * 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)))
|
||||
* 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
|
||||
case 2:
|
||||
return hash * 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET));
|
||||
case 1:
|
||||
return hash * 31 + hashCodeAsciiSanitizeFromChar(
|
||||
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
|
||||
default:
|
||||
return hash;
|
||||
}
|
||||
}
|
||||
|
||||
static boolean hasCharArray(CharSequence data) {
|
||||
return STRING_CHAR_VALUE_FIELD_OFFSET != -1 && data.getClass() == String.class;
|
||||
}
|
||||
|
||||
static boolean hasByteArray(CharSequence data) {
|
||||
return STRING_BYTE_VALUE_FIELD_OFFSET != -1 && data.getClass() == String.class;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callers are expected to call {@link #hasCharArray(CharSequence)} before calling this method.
|
||||
*/
|
||||
static char[] charArray(CharSequence data) {
|
||||
return (char[]) UNSAFE.getObject(data, STRING_CHAR_VALUE_FIELD_OFFSET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callers are expected to call {@link #hasByteArray(CharSequence)} before calling this method.
|
||||
*/
|
||||
static byte[] byteArray(CharSequence data) {
|
||||
return (byte[]) UNSAFE.getObject(data, STRING_BYTE_VALUE_FIELD_OFFSET);
|
||||
}
|
||||
|
||||
static int hashCodeAsciiCompute(long value, int hash) {
|
||||
// masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash
|
||||
// code the same regardless of character case (upper case or lower case hash is the same).
|
||||
return (hash * 31 +
|
||||
// High order int
|
||||
(int) ((value & 0x1f1f1f1f00000000L) >>> 32)) * 31 +
|
||||
return hash * HASH_CODE_C1 +
|
||||
// Low order int
|
||||
hashCodeAsciiSanitize((int) value);
|
||||
}
|
||||
|
||||
static int hashCodeAsciiComputeFromChar(long high, long low, int hash) {
|
||||
// masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash
|
||||
// code the same regardless of character case (upper case or lower case hash is the same).
|
||||
return (hash * 31 +
|
||||
// High order int (which is low order for char)
|
||||
hashCodeAsciiSanitizeFromChar(low)) * 31 +
|
||||
// Low order int (which is high order for char)
|
||||
hashCodeAsciiSanitizeFromChar(high);
|
||||
hashCodeAsciiSanitize((int) value) * HASH_CODE_C2 +
|
||||
// High order int
|
||||
(int) ((value & 0x1f1f1f1f00000000L) >>> 32);
|
||||
}
|
||||
|
||||
static int hashCodeAsciiSanitize(int value) {
|
||||
return value & 0x1f1f1f1f;
|
||||
}
|
||||
|
||||
private static int hashCodeAsciiSanitizeFromChar(long value) {
|
||||
return (int) (((value & 0x1f000000000000L) >>> 24) |
|
||||
((value & 0x1f00000000L) >>> 16) |
|
||||
((value & 0x1f0000) >>> 8) |
|
||||
(value & 0x1f));
|
||||
}
|
||||
|
||||
static int hashCodeAsciiSanitize(short value) {
|
||||
return value & 0x1f1f;
|
||||
}
|
||||
|
||||
private static int hashCodeAsciiSanitizeFromChar(int value) {
|
||||
return ((value & 0x1f0000) >>> 8) | (value & 0x1f);
|
||||
}
|
||||
|
||||
static int hashCodeAsciiSanitizeAsByte(char value) {
|
||||
return value & 0x1f;
|
||||
}
|
||||
|
||||
static int hashCodeAsciiSanitize(byte value) {
|
||||
return value & 0x1f;
|
||||
}
|
||||
|
||||
private static int hashCodeAsciiSanitizeFromChar(short value) {
|
||||
return value & 0x1f;
|
||||
}
|
||||
|
||||
static <U, W> AtomicReferenceFieldUpdater<U, W> newAtomicReferenceFieldUpdater(
|
||||
Class<? super U> tclass, String fieldName) throws Exception {
|
||||
return new UnsafeAtomicReferenceFieldUpdater<U, W>(UNSAFE, tclass, fieldName);
|
||||
|
@ -124,9 +124,6 @@ public class PlatformDependentTest {
|
||||
assertEquals("length=" + i,
|
||||
hashCodeAsciiSafe(bytes, 0, bytes.length),
|
||||
hashCodeAscii(bytes, 0, bytes.length));
|
||||
assertEquals("length=" + i,
|
||||
hashCodeAsciiSafe(string),
|
||||
hashCodeAscii(string));
|
||||
assertEquals("length=" + i,
|
||||
hashCodeAscii(bytes, 0, bytes.length),
|
||||
hashCodeAscii(string));
|
||||
|
@ -34,7 +34,7 @@ import java.util.Random;
|
||||
@Warmup(iterations = 5)
|
||||
public class AsciiStringBenchmark extends AbstractMicrobenchmark {
|
||||
|
||||
@Param({ "3", "5", "7", "8", "10", "20", "50" })
|
||||
@Param({ "3", "5", "7", "8", "10", "20", "50", "100", "1000" })
|
||||
public int size;
|
||||
|
||||
private AsciiString asciiString;
|
||||
|
Loading…
x
Reference in New Issue
Block a user