AsciiString optimized hashCode

Motivation:
The AsciiString.hashCode() method can be optimized. This method is frequently used while to build the DefaultHeaders data structure.

Modification:
- Add a PlatformDependent hashCode algorithm which utilizes UNSAFE if available

Result:
AsciiString hashCode is faster.
This commit is contained in:
Scott Mitchell 2015-10-28 19:36:17 -07:00
parent 80ce2bf3e4
commit 33b74e3f87
7 changed files with 564 additions and 113 deletions

View File

@ -30,14 +30,14 @@ import java.util.List;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import static io.netty.util.internal.ObjectUtil.checkNotNull;
import static io.netty.util.internal.MathUtil.isOutOfBounds;
import static io.netty.util.internal.ObjectUtil.checkNotNull;
/**
* A string which has been encoded into a character encoding whose character always takes a single byte, similarly to
* ASCII. It internally keeps its content in a byte array unlike {@link String}, which uses a character array, for
* reduced memory footprint and faster data transfer from/to byte-based data structures such as a byte array and
* {@link ByteBuffer}. It is often used in conjunction with {@link TextHeaders}.
* {@link ByteBuffer}. It is often used in conjunction with {@link Headers} that require a {@link CharSequence}.
* <p>
* This class was designed to provide an immutable array of bytes, and caches some internal state based upon the value
* of this array. However underlying access to this byte array is provided via not copying the array on construction or
@ -47,7 +47,6 @@ import static io.netty.util.internal.MathUtil.isOutOfBounds;
public final class AsciiString implements CharSequence, Comparable<CharSequence> {
public static final AsciiString EMPTY_STRING = new AsciiString("");
private static final char MAX_CHAR_VALUE = 255;
private static final int HASH_CODE_PRIME = 31;
/**
* If this value is modified outside the constructor then call {@link #arrayChanged()}.
@ -1085,29 +1084,19 @@ public final class AsciiString implements CharSequence, Comparable<CharSequence>
return res.toArray(new AsciiString[res.size()]);
}
/**
* {@inheritDoc}
* <p>
* Provides a case-insensitive hash code for Ascii like byte strings.
*/
@Override
public int hashCode() {
int h = hash;
if (h == 0) {
final int end = arrayOffset() + length();
for (int i = arrayOffset(); i < end; ++i) {
// masking with 0x1F reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
h = h * HASH_CODE_PRIME + (value[i] & 0x1F);
}
hash = h;
if (hash == 0) {
hash = PlatformDependent.hashCodeAscii(value, offset, length);
}
return hash;
}
/**
* Generate a hash code that will be consistent regardless of ASCII character casing.
*/
public int hashCodeCaseInsensitive() {
return hashCode();
}
@Override
public boolean equals(Object obj) {
if (obj == null || obj.getClass() != AsciiString.class) {
@ -1118,9 +1107,9 @@ public final class AsciiString implements CharSequence, Comparable<CharSequence>
}
AsciiString other = (AsciiString) obj;
return hashCode() == other.hashCode() &&
PlatformDependent.equals(array(), arrayOffset(), arrayOffset() + length(),
other.array(), other.arrayOffset(), other.arrayOffset() + other.length());
return length() == other.length() &&
hashCode() == other.hashCode() &&
PlatformDependent.equals(array(), arrayOffset(), other.array(), other.arrayOffset(), length());
}
/**
@ -1336,7 +1325,7 @@ public final class AsciiString implements CharSequence, Comparable<CharSequence>
new HashingStrategy<CharSequence>() {
@Override
public int hashCode(CharSequence o) {
return AsciiString.caseInsensitiveHashCode(o);
return AsciiString.hashCode(o);
}
@Override
@ -1344,6 +1333,7 @@ public final class AsciiString implements CharSequence, Comparable<CharSequence>
return AsciiString.contentEqualsIgnoreCase(a, b);
}
};
public static final HashingStrategy<CharSequence> CASE_SENSITIVE_HASHER =
new HashingStrategy<CharSequence>() {
@Override
@ -1368,27 +1358,7 @@ public final class AsciiString implements CharSequence, Comparable<CharSequence>
/**
* Returns the case-insensitive hash code of the specified string. Note that this method uses the same hashing
* algorithm with {@link #hashCode()} so that you can put both {@link AsciiString}s and arbitrary
* {@link CharSequence}s into the same {@link TextHeaders}.
*/
public static int caseInsensitiveHashCode(CharSequence value) {
if (value == null) {
return 0;
}
if (value.getClass() == AsciiString.class) {
return ((AsciiString) value).hashCodeCaseInsensitive();
}
int hash = 0;
for (int i = 0; i < value.length(); ++i) {
hash = hash * HASH_CODE_PRIME + (value.charAt(i) & 0x1F);
}
return hash;
}
/**
* A case-sensitive version of {@link caseInsensitiveHashCode(CharSequence)}.
* @param value
* @return
* {@link CharSequence}s into the same headers.
*/
public static int hashCode(CharSequence value) {
if (value == null) {
@ -1398,11 +1368,7 @@ public final class AsciiString implements CharSequence, Comparable<CharSequence>
return ((AsciiString) value).hashCode();
}
int hash = 0;
for (int i = 0; i < value.length(); ++i) {
hash = hash * HASH_CODE_PRIME + (value.charAt(i) & 0x1F);
}
return hash;
return PlatformDependent.hashCodeAscii(value);
}
/**

View File

@ -49,6 +49,11 @@ import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static io.netty.util.internal.PlatformDependent0.HASH_CODE_ASCII_SEED;
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiCompute;
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitize;
import static io.netty.util.internal.PlatformDependent0.hashCodeAsciiSanitizeAsByte;
/**
* Utility that detects various properties specific to the current runtime
* environment, such as Java version and the availability of the
@ -361,6 +366,88 @@ public final class PlatformDependent {
return PlatformDependent0.getLong(data, index);
}
private static long getLongSafe(byte[] bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return (long) bytes[offset] << 56 |
((long) bytes[offset + 1] & 0xff) << 48 |
((long) bytes[offset + 2] & 0xff) << 40 |
((long) bytes[offset + 3] & 0xff) << 32 |
((long) bytes[offset + 4] & 0xff) << 24 |
((long) bytes[offset + 5] & 0xff) << 16 |
((long) bytes[offset + 6] & 0xff) << 8 |
(long) bytes[offset + 7] & 0xff;
}
return (long) bytes[offset] & 0xff |
((long) bytes[offset + 1] & 0xff) << 8 |
((long) bytes[offset + 2] & 0xff) << 16 |
((long) bytes[offset + 3] & 0xff) << 24 |
((long) bytes[offset + 4] & 0xff) << 32 |
((long) bytes[offset + 5] & 0xff) << 40 |
((long) bytes[offset + 6] & 0xff) << 48 |
((long) bytes[offset + 7] & 0xff) << 56;
}
private static long getLongFromBytesSafe(CharSequence bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return (long) bytes.charAt(offset) << 56 |
((long) bytes.charAt(offset + 1) & 0xff) << 48 |
((long) bytes.charAt(offset + 2) & 0xff) << 40 |
((long) bytes.charAt(offset + 3) & 0xff) << 32 |
((long) bytes.charAt(offset + 4) & 0xff) << 24 |
((long) bytes.charAt(offset + 5) & 0xff) << 16 |
((long) bytes.charAt(offset + 6) & 0xff) << 8 |
(long) bytes.charAt(offset + 7) & 0xff;
}
return (long) bytes.charAt(offset) & 0xff |
((long) bytes.charAt(offset + 1) & 0xff) << 8 |
((long) bytes.charAt(offset + 2) & 0xff) << 16 |
((long) bytes.charAt(offset + 3) & 0xff) << 24 |
((long) bytes.charAt(offset + 4) & 0xff) << 32 |
((long) bytes.charAt(offset + 5) & 0xff) << 40 |
((long) bytes.charAt(offset + 6) & 0xff) << 48 |
((long) bytes.charAt(offset + 7) & 0xff) << 56;
}
private static int getIntSafe(byte[] bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return bytes[offset] << 24 |
(bytes[offset + 1] & 0xff) << 16 |
(bytes[offset + 2] & 0xff) << 8 |
bytes[offset + 3] & 0xff;
}
return bytes[offset] & 0xff |
(bytes[offset + 1] & 0xff) << 8 |
(bytes[offset + 2] & 0xff) << 16 |
bytes[offset + 3] << 24;
}
private static int getIntFromBytesSafe(CharSequence bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return bytes.charAt(offset) << 24 |
(bytes.charAt(offset + 1) & 0xff) << 16 |
(bytes.charAt(offset + 2) & 0xff) << 8 |
bytes.charAt(offset + 3) & 0xff;
}
return bytes.charAt(offset) & 0xff |
(bytes.charAt(offset + 1) & 0xff) << 8 |
(bytes.charAt(offset + 2) & 0xff) << 16 |
bytes.charAt(offset + 3) << 24;
}
private static short getShortSafe(byte[] bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return (short) (bytes[offset] << 8 | (bytes[offset + 1] & 0xff));
}
return (short) (bytes[offset] & 0xff | (bytes[offset + 1] << 8));
}
private static short getShortFromBytesSafe(CharSequence bytes, int offset) {
if (BIG_ENDIAN_NATIVE_ORDER) {
return (short) (bytes.charAt(offset) << 8 | (bytes.charAt(offset + 1) & 0xff));
}
return (short) (bytes.charAt(offset) & 0xff | (bytes.charAt(offset + 1) << 8));
}
public static void putOrderedObject(Object object, long address, Object value) {
PlatformDependent0.putOrderedObject(object, address, value);
}
@ -415,16 +502,51 @@ public final class PlatformDependent {
*
* @param bytes1 the first byte array.
* @param startPos1 the position (inclusive) to start comparing in {@code bytes1}.
* @param endPos1 the position (exclusive) to stop comparing in {@code bytes1}.
* @param bytes2 the second byte array.
* @param startPos2 the position (inclusive) to start comparing in {@code bytes2}.
* @param endPos2 the position (exclusive) to stop comparing in {@code bytes2}.
* @param length the amount of bytes to compare. This is assumed to be validated as not going out of bounds
* by the caller.
*/
public static boolean equals(byte[] bytes1, int startPos1, int endPos1, byte[] bytes2, int startPos2, int endPos2) {
public static boolean equals(byte[] bytes1, int startPos1, byte[] bytes2, int startPos2, int length) {
if (!hasUnsafe() || !PlatformDependent0.unalignedAccess()) {
return safeEquals(bytes1, startPos1, endPos1, bytes2, startPos2, endPos2);
return equalsSafe(bytes1, startPos1, bytes2, startPos2, length);
}
return PlatformDependent0.equals(bytes1, startPos1, endPos1, bytes2, startPos2, endPos2);
return PlatformDependent0.equals(bytes1, startPos1, bytes2, startPos2, length);
}
/**
* Calculate a hash code of a byte array assuming ASCII character encoding.
* The resulting hash code will be case insensitive.
* @param bytes The array which contains the data to hash.
* @param startPos What index to start generating a hash code in {@code bytes}
* @param length The amount of bytes that should be accounted for in the computation.
* @return The hash code of {@code bytes} assuming ASCII character encoding.
* The resulting hash code will be case insensitive.
*/
public static int hashCodeAscii(byte[] bytes, int startPos, int length) {
if (!hasUnsafe() || !PlatformDependent0.unalignedAccess()) {
return hashCodeAsciiSafe(bytes, startPos, length);
}
return PlatformDependent0.hashCodeAscii(bytes, startPos, length);
}
/**
* Calculate a hash code of a byte array assuming ASCII character encoding.
* The resulting hash code will be case insensitive.
* <p>
* This method assumes that {@code bytes} is equivalent to a {@code byte[]} but just using {@link CharSequence}
* for storage. The upper most byte of each {@code char} from {@code bytes} is ignored.
* @param bytes The array which contains the data to hash (assumed to be equivalent to a {@code byte[]}).
* @return The hash code of {@code bytes} assuming ASCII character encoding.
* The resulting hash code will be case insensitive.
*/
public static int hashCodeAscii(CharSequence bytes) {
char[] array;
if (!hasUnsafe() || !PlatformDependent0.unalignedAccess() ||
(array = PlatformDependent0.array(bytes)) == null) {
return hashCodeAsciiSafe(bytes);
}
return PlatformDependent0.hashCodeAscii(array);
}
/**
@ -939,14 +1061,8 @@ public final class PlatformDependent {
return PlatformDependent0.addressSize();
}
private static boolean safeEquals(byte[] bytes1, int startPos1, int endPos1,
byte[] bytes2, int startPos2, int endPos2) {
final int len1 = endPos1 - startPos1;
final int len2 = endPos2 - startPos2;
if (len1 != len2) {
return false;
}
final int end = startPos1 + len1;
private static boolean equalsSafe(byte[] bytes1, int startPos1, byte[] bytes2, int startPos2, int length) {
final int end = startPos1 + length;
for (int i = startPos1, j = startPos2; i < end; ++i, ++j) {
if (bytes1[i] != bytes2[j]) {
return false;
@ -955,7 +1071,78 @@ public final class PlatformDependent {
return true;
}
/**
* Package private for testing purposes only!
*/
static int hashCodeAsciiSafe(byte[] bytes, int startPos, int length) {
int hash = HASH_CODE_ASCII_SEED;
final int remainingBytes = length & 7;
final int end = startPos + remainingBytes;
for (int i = startPos - 8 + length; i >= end; i -= 8) {
hash = hashCodeAsciiCompute(getLongSafe(bytes, i), hash);
}
switch(remainingBytes) {
case 7:
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 3)), 13))
* 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
case 6:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 2)), 13))
* 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
case 5:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntSafe(bytes, startPos + 1)), 13))
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
case 4:
return hash * 31 + hashCodeAsciiSanitize(getIntSafe(bytes, startPos));
case 3:
return (hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos + 1)))
* 31 + hashCodeAsciiSanitize(bytes[startPos]);
case 2:
return hash * 31 + hashCodeAsciiSanitize(getShortSafe(bytes, startPos));
case 1:
return hash * 31 + hashCodeAsciiSanitize(bytes[startPos]);
default:
return hash;
}
}
/**
* Package private for testing purposes only!
*/
static int hashCodeAsciiSafe(CharSequence bytes) {
int hash = HASH_CODE_ASCII_SEED;
final int remainingBytes = bytes.length() & 7;
for (int i = bytes.length() - 8; i >= remainingBytes; i -= 8) {
hash = hashCodeAsciiCompute(getLongFromBytesSafe(bytes, i), hash);
}
switch(remainingBytes) {
case 7:
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 3)), 13))
* 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1)))
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
case 6:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 2)), 13))
* 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0));
case 5:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 1)), 13))
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
case 4:
return hash * 31 + hashCodeAsciiSanitize(getIntFromBytesSafe(bytes, 0));
case 3:
return (hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 1)))
* 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
case 2:
return hash * 31 + hashCodeAsciiSanitize(getShortFromBytesSafe(bytes, 0));
case 1:
return hash * 31 + hashCodeAsciiSanitizeAsByte(bytes.charAt(0));
default:
return hash;
}
}
private static final class AtomicLongCounter extends AtomicLong implements LongCounter {
private static final long serialVersionUID = 4074772784610639305L;
@Override
public void add(long delta) {
addAndGet(delta);

View File

@ -40,6 +40,10 @@ final class PlatformDependent0 {
static final Unsafe UNSAFE;
private static final long ADDRESS_FIELD_OFFSET;
private static final long BYTE_ARRAY_BASE_OFFSET;
private static final long CHAR_ARRAY_BASE_OFFSET;
private static final long CHAR_ARRAY_INDEX_SCALE;
private static final long STRING_VALUE_FIELD_OFFSET;
static final int HASH_CODE_ASCII_SEED = 0xc2b2ae35; // constant borrowed from murmur3
/**
* Limits the number of bytes to copy per {@link Unsafe#copyMemory(long, long, long)} to allow safepoint polling
@ -108,11 +112,14 @@ final class PlatformDependent0 {
if (unsafe == null) {
ADDRESS_FIELD_OFFSET = -1;
BYTE_ARRAY_BASE_OFFSET = -1;
BYTE_ARRAY_BASE_OFFSET = CHAR_ARRAY_BASE_OFFSET = CHAR_ARRAY_INDEX_SCALE = -1;
UNALIGNED = false;
STRING_VALUE_FIELD_OFFSET = -1;
} else {
ADDRESS_FIELD_OFFSET = objectFieldOffset(addressField);
BYTE_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(byte[].class);
CHAR_ARRAY_BASE_OFFSET = UNSAFE.arrayBaseOffset(char[].class);
CHAR_ARRAY_INDEX_SCALE = UNSAFE.arrayIndexScale(char[].class);
boolean unaligned;
try {
Class<?> bitsClass = Class.forName("java.nio.Bits", false, ClassLoader.getSystemClassLoader());
@ -128,6 +135,26 @@ final class PlatformDependent0 {
UNALIGNED = unaligned;
logger.debug("java.nio.Bits.unaligned: {}", UNALIGNED);
Field stringValueField = AccessController.doPrivileged(new PrivilegedAction<Field>() {
@Override
public Field run() {
try {
Field f = String.class.getDeclaredField("value");
f.setAccessible(true);
return f;
} catch (NoSuchFieldException e) {
logger.warn("Failed to find String value array." +
"String hash code optimizations are disabled.", e);
} catch (SecurityException e) {
logger.info("No permissions to get String value array." +
"String hash code optimizations are disabled.", e);
}
return null;
}
});
STRING_VALUE_FIELD_OFFSET = stringValueField == null ?
-1 : UNSAFE.objectFieldOffset(stringValueField);
}
}
@ -272,35 +299,188 @@ final class PlatformDependent0 {
}
}
static boolean equals(byte[] bytes1, int startPos1, int endPos1, byte[] bytes2, int startPos2, int endPos2) {
final int len1 = endPos1 - startPos1;
final int len2 = endPos2 - startPos2;
if (len1 != len2) {
return false;
}
if (len1 == 0) {
static boolean equals(byte[] bytes1, int startPos1, byte[] bytes2, int startPos2, int length) {
if (length == 0) {
return true;
}
final long baseOffset1 = BYTE_ARRAY_BASE_OFFSET + startPos1;
final long baseOffset2 = BYTE_ARRAY_BASE_OFFSET + startPos2;
int remainingBytes = len1 & 7;
for (int i = len1 - 8; i >= remainingBytes; i -= 8) {
if (UNSAFE.getLong(bytes1, baseOffset1 + i) != UNSAFE.getLong(bytes2, baseOffset2 + i)) {
final int remainingBytes = length & 7;
final long end = baseOffset1 + remainingBytes;
for (long i = baseOffset1 - 8 + length, j = baseOffset2 - 8 + length; i >= end; i -= 8, j -= 8) {
if (UNSAFE.getLong(bytes1, i) != UNSAFE.getLong(bytes2, j)) {
return false;
}
}
if (remainingBytes >= 4) {
remainingBytes -= 4;
if (UNSAFE.getInt(bytes1, baseOffset1 + remainingBytes) !=
UNSAFE.getInt(bytes2, baseOffset2 + remainingBytes)) {
return false;
switch (remainingBytes) {
case 7:
return UNSAFE.getInt(bytes1, baseOffset1 + 3) == UNSAFE.getInt(bytes2, baseOffset2 + 3) &&
UNSAFE.getChar(bytes1, baseOffset1 + 1) == UNSAFE.getChar(bytes2, baseOffset2 + 1) &&
UNSAFE.getByte(bytes1, baseOffset1) == UNSAFE.getByte(bytes2, baseOffset2);
case 6:
return UNSAFE.getInt(bytes1, baseOffset1 + 2) == UNSAFE.getInt(bytes2, baseOffset2 + 2) &&
UNSAFE.getChar(bytes1, baseOffset1) == UNSAFE.getChar(bytes2, baseOffset2);
case 5:
return UNSAFE.getInt(bytes1, baseOffset1 + 1) == UNSAFE.getInt(bytes2, baseOffset2 + 1) &&
UNSAFE.getByte(bytes1, baseOffset1) == UNSAFE.getByte(bytes2, baseOffset2);
case 4:
return UNSAFE.getInt(bytes1, baseOffset1) == UNSAFE.getInt(bytes2, baseOffset2);
case 3:
return UNSAFE.getChar(bytes1, baseOffset1 + 1) == UNSAFE.getChar(bytes2, baseOffset2 + 1) &&
UNSAFE.getByte(bytes1, baseOffset1) == UNSAFE.getByte(bytes2, baseOffset2);
case 2:
return UNSAFE.getChar(bytes1, baseOffset1) == UNSAFE.getChar(bytes2, baseOffset2);
case 1:
return UNSAFE.getByte(bytes1, baseOffset1) == UNSAFE.getByte(bytes2, baseOffset2);
default:
return true;
}
}
/**
* This must remain consistent with {@link #hashCodeAscii(char[])}.
*/
static int hashCodeAscii(byte[] bytes, int startPos, int length) {
int hash = HASH_CODE_ASCII_SEED;
final long baseOffset = BYTE_ARRAY_BASE_OFFSET + startPos;
final int remainingBytes = length & 7;
if (length > 7) { // Fast path for small sized inputs. Benchmarking shows this is beneficial.
final long end = baseOffset + remainingBytes;
for (long i = baseOffset - 8 + length; i >= end; i -= 8) {
hash = hashCodeAsciiCompute(UNSAFE.getLong(bytes, i), hash);
}
}
if (remainingBytes >= 2) {
return UNSAFE.getChar(bytes1, baseOffset1) == UNSAFE.getChar(bytes2, baseOffset2) &&
(remainingBytes == 2 || bytes1[startPos1 + 2] == bytes2[startPos2 + 2]);
switch(remainingBytes) {
case 7:
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 3)), 13))
* 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)))
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
case 6:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 2)), 13))
* 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset));
case 5:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset + 1)), 13))
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
case 4:
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getInt(bytes, baseOffset));
case 3:
return (hash * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset + 1)))
* 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
case 2:
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getShort(bytes, baseOffset));
case 1:
return hash * 31 + hashCodeAsciiSanitize(UNSAFE.getByte(bytes, baseOffset));
default:
return hash;
}
return bytes1[startPos1] == bytes2[startPos2];
}
/**
* This method assumes that {@code bytes} is equivalent to a {@code byte[]} but just using {@code char[]}
* for storage. The MSB of each {@code char} from {@code bytes} is ignored.
* <p>
* This must remain consistent with {@link #hashCodeAscii(byte[], int, int)}.
*/
static int hashCodeAscii(char[] bytes) {
int hash = HASH_CODE_ASCII_SEED;
final int remainingBytes = bytes.length & 7;
for (int i = bytes.length - 8; i >= remainingBytes; i -= 8) {
hash = hashCodeAsciiComputeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + i * CHAR_ARRAY_INDEX_SCALE),
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + (i + 4) * CHAR_ARRAY_INDEX_SCALE),
hash);
}
switch(remainingBytes) {
case 7:
return ((hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + 3 * CHAR_ARRAY_INDEX_SCALE)), 13))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
case 6:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + 2 * CHAR_ARRAY_INDEX_SCALE)), 13))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET));
case 5:
return (hash * 31 + Integer.rotateLeft(hashCodeAsciiSanitizeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)), 13))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
case 4:
return hash * 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getLong(bytes, CHAR_ARRAY_BASE_OFFSET));
case 3:
return (hash * 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET + CHAR_ARRAY_INDEX_SCALE)))
* 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
case 2:
return hash * 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getInt(bytes, CHAR_ARRAY_BASE_OFFSET));
case 1:
return hash * 31 + hashCodeAsciiSanitizeFromChar(
UNSAFE.getShort(bytes, CHAR_ARRAY_BASE_OFFSET));
default:
return hash;
}
}
static char[] array(CharSequence data) {
return (STRING_VALUE_FIELD_OFFSET != -1 && data.getClass() == String.class) ?
(char[]) UNSAFE.getObject(data, STRING_VALUE_FIELD_OFFSET) : null;
}
static int hashCodeAsciiCompute(long value, int hash) {
// masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
return (hash * 31 +
// High order int
(int) ((value & 0x1f1f1f1f00000000L) >>> 32)) * 31 +
// Low order int
hashCodeAsciiSanitize((int) value);
}
static int hashCodeAsciiComputeFromChar(long high, long low, int hash) {
// masking with 0x1f reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
return (hash * 31 +
// High order int (which is low order for char)
hashCodeAsciiSanitizeFromChar(low)) * 31 +
// Low order int (which is high order for char)
hashCodeAsciiSanitizeFromChar(high);
}
static int hashCodeAsciiSanitize(int value) {
return value & 0x1f1f1f1f;
}
private static int hashCodeAsciiSanitizeFromChar(long value) {
return (int) (((value & 0x1f000000000000L) >>> 24) |
((value & 0x1f00000000L) >>> 16) |
((value & 0x1f0000) >>> 8) |
(value & 0x1f));
}
static int hashCodeAsciiSanitize(short value) {
return value & 0x1f1f;
}
private static int hashCodeAsciiSanitizeFromChar(int value) {
return ((value & 0x1f0000) >>> 8) | (value & 0x1f);
}
static int hashCodeAsciiSanitizeAsByte(char value) {
return value & 0x1f;
}
static int hashCodeAsciiSanitize(byte value) {
return value & 0x1f;
}
private static int hashCodeAsciiSanitizeFromChar(short value) {
return value & 0x1f;
}
static <U, W> AtomicReferenceFieldUpdater<U, W> newAtomicReferenceFieldUpdater(
@ -371,5 +551,4 @@ final class PlatformDependent0 {
private PlatformDependent0() {
}
}

View File

@ -22,7 +22,6 @@ import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.Random;
import static io.netty.util.AsciiString.caseInsensitiveHashCode;
import static io.netty.util.AsciiString.contains;
import static io.netty.util.AsciiString.containsIgnoreCase;
import static org.junit.Assert.assertArrayEquals;
@ -97,7 +96,6 @@ public class AsciiStringCharacterTest {
AsciiString sub1 = ascii.subSequence(start, end, false);
AsciiString sub2 = ascii.subSequence(start, end, true);
assertEquals(sub1.hashCode(), sub2.hashCode());
assertEquals(sub1.hashCodeCaseInsensitive(), sub2.hashCode());
assertEquals(sub1, sub2);
for (int i = start; i < end; ++i) {
assertEquals(init[i], sub1.byteAt(i - start));
@ -199,14 +197,14 @@ public class AsciiStringCharacterTest {
assertEquals(errorString, lowerCaseExpected, lowerCaseAscii.hashCode());
// Test case insensitive hash codes are equal
final int expectedCaseInsensative = lowerCaseAscii.hashCodeCaseInsensitive();
assertEquals(errorString, expectedCaseInsensative, caseInsensitiveHashCode(upperCaseBuilder));
assertEquals(errorString, expectedCaseInsensative, caseInsensitiveHashCode(upperCaseString));
assertEquals(errorString, expectedCaseInsensative, caseInsensitiveHashCode(lowerCaseString));
assertEquals(errorString, expectedCaseInsensative, caseInsensitiveHashCode(lowerCaseAscii));
assertEquals(errorString, expectedCaseInsensative, caseInsensitiveHashCode(upperCaseAscii));
assertEquals(errorString, expectedCaseInsensative, lowerCaseAscii.hashCodeCaseInsensitive());
assertEquals(errorString, expectedCaseInsensative, upperCaseAscii.hashCodeCaseInsensitive());
final int expectedCaseInsensative = lowerCaseAscii.hashCode();
assertEquals(errorString, expectedCaseInsensative, AsciiString.hashCode(upperCaseBuilder));
assertEquals(errorString, expectedCaseInsensative, AsciiString.hashCode(upperCaseString));
assertEquals(errorString, expectedCaseInsensative, AsciiString.hashCode(lowerCaseString));
assertEquals(errorString, expectedCaseInsensative, AsciiString.hashCode(lowerCaseAscii));
assertEquals(errorString, expectedCaseInsensative, AsciiString.hashCode(upperCaseAscii));
assertEquals(errorString, expectedCaseInsensative, lowerCaseAscii.hashCode());
assertEquals(errorString, expectedCaseInsensative, upperCaseAscii.hashCode());
// Test that opposite cases are equal
assertEquals(errorString, lowerCaseAscii.hashCode(), AsciiString.hashCode(upperCaseString));
@ -222,7 +220,7 @@ public class AsciiStringCharacterTest {
array[offset + i] = s1.charAt(i);
}
CharBuffer buffer = CharBuffer.wrap(array, offset, s1.length());
assertEquals(caseInsensitiveHashCode(s1), caseInsensitiveHashCode(buffer));
assertEquals(AsciiString.hashCode(s1), AsciiString.hashCode(buffer));
}
@Test

View File

@ -17,40 +17,43 @@ package io.netty.util.internal;
import org.junit.Test;
import java.util.Random;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertTrue;
public class PlatformDependentTest {
private static final Random r = new Random();
@Test
public void testEquals() {
byte[] bytes1 = {'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd'};
byte[] bytes2 = {'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd'};
assertNotSame(bytes1, bytes2);
assertTrue(PlatformDependent.equals(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 2, bytes1.length, bytes2, 2, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 0, bytes2, 0, bytes1.length));
assertTrue(PlatformDependent.equals(bytes1, 2, bytes2, 2, bytes1.length - 2));
bytes1 = new byte[] {1, 2, 3, 4, 5, 6};
bytes2 = new byte[] {1, 2, 3, 4, 5, 6, 7};
assertNotSame(bytes1, bytes2);
assertFalse(PlatformDependent.equals(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes2, 0, 6, bytes1, 0, 6));
assertFalse(PlatformDependent.equals(bytes1, 0, bytes2, 1, bytes1.length));
assertTrue(PlatformDependent.equals(bytes2, 0, bytes1, 0, bytes1.length));
bytes1 = new byte[] {1, 2, 3, 4};
bytes2 = new byte[] {1, 2, 3, 5};
assertFalse(PlatformDependent.equals(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 0, 3, bytes2, 0, 3));
assertFalse(PlatformDependent.equals(bytes1, 0, bytes2, 0, bytes1.length));
assertTrue(PlatformDependent.equals(bytes1, 0, bytes2, 0, 3));
bytes1 = new byte[] {1, 2, 3, 4};
bytes2 = new byte[] {1, 3, 3, 4};
assertFalse(PlatformDependent.equals(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 2, bytes1.length, bytes2, 2, bytes2.length));
assertFalse(PlatformDependent.equals(bytes1, 0, bytes2, 0, bytes1.length));
assertTrue(PlatformDependent.equals(bytes1, 2, bytes2, 2, bytes1.length - 2));
bytes1 = new byte[0];
bytes2 = new byte[0];
assertNotSame(bytes1, bytes2);
assertTrue(PlatformDependent.equals(bytes1, 0, 0, bytes2, 0, 0));
assertTrue(PlatformDependent.equals(bytes1, 0, bytes2, 0, 0));
bytes1 = new byte[100];
bytes2 = new byte[100];
@ -58,16 +61,50 @@ public class PlatformDependentTest {
bytes1[i] = (byte) i;
bytes2[i] = (byte) i;
}
assertTrue(PlatformDependent.equals(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 0, bytes2, 0, bytes1.length));
bytes1[50] = 0;
assertFalse(PlatformDependent.equals(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 51, bytes1.length, bytes2, 51, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 0, 50, bytes2, 0, 50));
assertFalse(PlatformDependent.equals(bytes1, 0, bytes2, 0, bytes1.length));
assertTrue(PlatformDependent.equals(bytes1, 51, bytes2, 51, bytes1.length - 51));
assertTrue(PlatformDependent.equals(bytes1, 0, bytes2, 0, 50));
bytes1 = new byte[]{1, 2, 3, 4, 5};
bytes2 = new byte[]{3, 4, 5};
assertFalse(PlatformDependent.equals(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 2, bytes1.length, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes2, 0, bytes2.length, bytes1, 2, bytes1.length));
assertFalse(PlatformDependent.equals(bytes1, 0, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes1, 2, bytes2, 0, bytes2.length));
assertTrue(PlatformDependent.equals(bytes2, 0, bytes1, 2, bytes2.length));
for (int i = 0; i < 1000; ++i) {
bytes1 = new byte[i];
r.nextBytes(bytes1);
bytes2 = bytes1.clone();
assertTrue(PlatformDependent.equals(bytes1, 0, bytes2, 0, bytes1.length));
}
}
private static char randomCharInByteRange() {
return (char) r.nextInt(255 + 1);
}
@Test
public void testHashCodeAscii() {
for (int i = 0; i < 1000; ++i) {
// byte[] and char[] need to be initialized such that there values are within valid "ascii" range
byte[] bytes = new byte[i];
char[] bytesChar = new char[i];
for (int j = 0; j < bytesChar.length; ++j) {
bytesChar[j] = randomCharInByteRange();
bytes[j] = (byte) (bytesChar[j] & 0xff);
}
String string = new String(bytesChar);
assertEquals("length=" + i,
PlatformDependent.hashCodeAsciiSafe(bytes, 0, bytes.length),
PlatformDependent.hashCodeAscii(bytes, 0, bytes.length));
assertEquals("length=" + i,
PlatformDependent.hashCodeAsciiSafe(string),
PlatformDependent.hashCodeAscii(string));
assertEquals("length=" + i,
PlatformDependent.hashCodeAscii(bytes, 0, bytes.length),
PlatformDependent.hashCodeAscii(string));
}
}
}

View File

@ -51,7 +51,7 @@ public class PlatformDependentBenchmark extends AbstractMicrobenchmark {
@Benchmark
@BenchmarkMode(Mode.Throughput)
public boolean unsafeBytesEqual() {
return PlatformDependent.equals(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length);
return PlatformDependent.equals(bytes1, 0, bytes2, 0, bytes1.length);
}
@Benchmark

View File

@ -0,0 +1,84 @@
/*
* Copyright 2015 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.microbenchmark.common;
import io.netty.microbench.util.AbstractMicrobenchmark;
import io.netty.util.AsciiString;
import io.netty.util.CharsetUtil;
import io.netty.util.internal.PlatformDependent;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import java.util.Random;
@Threads(1)
@Measurement(iterations = 5)
@Warmup(iterations = 5)
public class AsciiStringBenchmark extends AbstractMicrobenchmark {
@Param({ "3", "5", "7", "8", "10", "20", "50" })
public int size;
private AsciiString asciiString;
private String string;
private static final Random random = new Random();
@Setup(Level.Trial)
public void setup() {
byte[] bytes = new byte[size];
random.nextBytes(bytes);
asciiString = new AsciiString(bytes, false);
string = new String(bytes, CharsetUtil.US_ASCII);
}
@Benchmark
public int hashCodeBenchBytesOld() {
int h = 0;
final int end = asciiString.arrayOffset() + asciiString.length();
for (int i = asciiString.arrayOffset(); i < end; ++i) {
// masking with 0x1F reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
h = h * 31 + (asciiString.array()[i] & 0x1F);
}
return h;
}
@Benchmark
public int hashCodeBenchBytesNew() {
return PlatformDependent.hashCodeAscii(asciiString.array(), asciiString.arrayOffset(), asciiString.length());
}
@Benchmark
public int hashCodeBenchCharSequenceOld() {
int h = 0;
for (int i = 0; i < string.length(); ++i) {
// masking with 0x1F reduces the number of overall bits that impact the hash code but makes the hash
// code the same regardless of character case (upper case or lower case hash is the same).
h = h * 31 + (string.charAt(i) & 0x1F);
}
return h;
}
@Benchmark
public int hashCodeBenchCharSequenceNew() {
return PlatformDependent.hashCodeAscii(string);
}
}