Use Two way algorithm to optimize ByteBufUtil.indexOf() method (#11367)
Use Two way algorithm to optimize ByteBufUtil.indexOf() method Motivation: ByteBufUtil.indexOf can be inefficient for substring search on ByteBuf, in terms of algorithm complexity (O(needle.readableBytes * haystack.readableBytes)), consider using the Two Way algorithm to optimize the ByteBufUtil.indexOf() method Modification: Use the Two Way algorithm to optimize ByteBufUtil.indexOf() method. Result: The performance of the ByteBufUtil.indexOf() method is higher than the original implementation
This commit is contained in:
parent
39d08dbf0c
commit
3273679e5f
@ -226,20 +226,125 @@ public final class ByteBufUtil {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the reader index of needle in haystack, or -1 if needle is not in haystack.
|
* Returns the reader index of needle in haystack, or -1 if needle is not in haystack.
|
||||||
|
* This method uses the <a href="https://en.wikipedia.org/wiki/Two-way_string-matching_algorithm">Two-Way
|
||||||
|
* string matching algorithm</a>, which yields O(1) space complexity and excellent performance.
|
||||||
*/
|
*/
|
||||||
public static int indexOf(ByteBuf needle, ByteBuf haystack) {
|
public static int indexOf(ByteBuf needle, ByteBuf haystack) {
|
||||||
// TODO: maybe use Boyer Moore for efficiency.
|
if (haystack == null || needle == null) {
|
||||||
int attempts = haystack.readableBytes() - needle.readableBytes() + 1;
|
return -1;
|
||||||
for (int i = 0; i < attempts; i++) {
|
}
|
||||||
if (equals(needle, needle.readerIndex(),
|
|
||||||
haystack, haystack.readerIndex() + i,
|
if (needle.readableBytes() > haystack.readableBytes()) {
|
||||||
needle.readableBytes())) {
|
return -1;
|
||||||
return haystack.readerIndex() + i;
|
}
|
||||||
|
|
||||||
|
int n = haystack.readableBytes();
|
||||||
|
int m = needle.readableBytes();
|
||||||
|
if (m == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// When the needle has only one byte that can be read,
|
||||||
|
// the firstIndexOf method needs to be called
|
||||||
|
if (m == 1) {
|
||||||
|
return firstIndexOf((AbstractByteBuf) haystack, haystack.readerIndex(),
|
||||||
|
haystack.writerIndex(), needle.getByte(needle.readerIndex()));
|
||||||
|
}
|
||||||
|
|
||||||
|
int i;
|
||||||
|
int j = 0;
|
||||||
|
int aStartIndex = needle.readerIndex();
|
||||||
|
int bStartIndex = haystack.readerIndex();
|
||||||
|
long suffixes = maxSuf(needle, m, aStartIndex, true);
|
||||||
|
long prefixes = maxSuf(needle, m, aStartIndex, false);
|
||||||
|
int ell = Math.max((int) (suffixes >> 32), (int) (prefixes >> 32));
|
||||||
|
int per = Math.max((int) suffixes, (int) prefixes);
|
||||||
|
int memory;
|
||||||
|
int length = Math.min(m - per, ell + 1);
|
||||||
|
|
||||||
|
if (equals(needle, aStartIndex, needle, aStartIndex + per, length)) {
|
||||||
|
memory = -1;
|
||||||
|
while (j <= n - m) {
|
||||||
|
i = Math.max(ell, memory) + 1;
|
||||||
|
while (i < m && needle.getByte(i + aStartIndex) == haystack.getByte(i + j + bStartIndex)) {
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
if (i > n) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (i >= m) {
|
||||||
|
i = ell;
|
||||||
|
while (i > memory && needle.getByte(i + aStartIndex) == haystack.getByte(i + j + bStartIndex)) {
|
||||||
|
--i;
|
||||||
|
}
|
||||||
|
if (i <= memory) {
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
j += per;
|
||||||
|
memory = m - per - 1;
|
||||||
|
} else {
|
||||||
|
j += i - ell;
|
||||||
|
memory = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
per = Math.max(ell + 1, m - ell - 1) + 1;
|
||||||
|
while (j <= n - m) {
|
||||||
|
i = ell + 1;
|
||||||
|
while (i < m && needle.getByte(i + aStartIndex) == haystack.getByte(i + j + bStartIndex)) {
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
if (i > n) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (i >= m) {
|
||||||
|
i = ell;
|
||||||
|
while (i >= 0 && needle.getByte(i + aStartIndex) == haystack.getByte(i + j + bStartIndex)) {
|
||||||
|
--i;
|
||||||
|
}
|
||||||
|
if (i < 0) {
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
j += per;
|
||||||
|
} else {
|
||||||
|
j += i - ell;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static long maxSuf(ByteBuf x, int m, int start, boolean isSuffix) {
|
||||||
|
int p = 1;
|
||||||
|
int ms = -1;
|
||||||
|
int j = start;
|
||||||
|
int k = 1;
|
||||||
|
byte a;
|
||||||
|
byte b;
|
||||||
|
while (j + k < m) {
|
||||||
|
a = x.getByte(j + k);
|
||||||
|
b = x.getByte(ms + k);
|
||||||
|
boolean suffix = isSuffix ? a < b : a > b;
|
||||||
|
if (suffix) {
|
||||||
|
j += k;
|
||||||
|
k = 1;
|
||||||
|
p = j - ms;
|
||||||
|
} else if (a == b) {
|
||||||
|
if (k != p) {
|
||||||
|
++k;
|
||||||
|
} else {
|
||||||
|
j += p;
|
||||||
|
k = 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ms = j;
|
||||||
|
j = ms + 1;
|
||||||
|
k = p = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ((long) ms << 32) + p;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns {@code true} if and only if the two specified buffers are
|
* Returns {@code true} if and only if the two specified buffers are
|
||||||
* identical to each other for {@code length} bytes starting at {@code aStartIndex}
|
* identical to each other for {@code length} bytes starting at {@code aStartIndex}
|
||||||
|
@ -107,6 +107,35 @@ public class ByteBufUtilTest {
|
|||||||
assertThrows(IllegalArgumentException.class, () -> ByteBufUtil.decodeHexDump("fg"));
|
assertThrows(IllegalArgumentException.class, () -> ByteBufUtil.decodeHexDump("fg"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIndexOf() {
|
||||||
|
ByteBuf haystack = Unpooled.copiedBuffer("abc123", CharsetUtil.UTF_8);
|
||||||
|
assertEquals(0, ByteBufUtil.indexOf(Unpooled.copiedBuffer("a", CharsetUtil.UTF_8), haystack));
|
||||||
|
assertEquals(1, ByteBufUtil.indexOf(Unpooled.copiedBuffer("bc".getBytes(CharsetUtil.UTF_8)), haystack));
|
||||||
|
assertEquals(2, ByteBufUtil.indexOf(Unpooled.copiedBuffer("c".getBytes(CharsetUtil.UTF_8)), haystack));
|
||||||
|
assertEquals(0, ByteBufUtil.indexOf(Unpooled.copiedBuffer("abc12".getBytes(CharsetUtil.UTF_8)), haystack));
|
||||||
|
assertEquals(-1, ByteBufUtil.indexOf(Unpooled.copiedBuffer("abcdef".getBytes(CharsetUtil.UTF_8)), haystack));
|
||||||
|
assertEquals(-1, ByteBufUtil.indexOf(Unpooled.copiedBuffer("abc12x".getBytes(CharsetUtil.UTF_8)), haystack));
|
||||||
|
assertEquals(-1, ByteBufUtil.indexOf(Unpooled.copiedBuffer("abc123def".getBytes(CharsetUtil.UTF_8)), haystack));
|
||||||
|
|
||||||
|
final ByteBuf needle = Unpooled.copiedBuffer("abc12", CharsetUtil.UTF_8);
|
||||||
|
haystack.readerIndex(1);
|
||||||
|
needle.readerIndex(1);
|
||||||
|
assertEquals(0, ByteBufUtil.indexOf(needle, haystack));
|
||||||
|
haystack.readerIndex(2);
|
||||||
|
needle.readerIndex(3);
|
||||||
|
assertEquals(1, ByteBufUtil.indexOf(needle, haystack));
|
||||||
|
haystack.readerIndex(1);
|
||||||
|
needle.readerIndex(2);
|
||||||
|
assertEquals(1, ByteBufUtil.indexOf(needle, haystack));
|
||||||
|
haystack.release();
|
||||||
|
|
||||||
|
haystack = Unpooled.copiedBuffer("123aab123", CharsetUtil.UTF_8);
|
||||||
|
assertEquals(3, ByteBufUtil.indexOf(Unpooled.copiedBuffer("aab", CharsetUtil.UTF_8), haystack));
|
||||||
|
haystack.release();
|
||||||
|
needle.release();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void equalsBufferSubsections() {
|
public void equalsBufferSubsections() {
|
||||||
byte[] b1 = new byte[128];
|
byte[] b1 = new byte[128];
|
||||||
|
Loading…
x
Reference in New Issue
Block a user