From 3273679e5fab314ece7abfe81f26b81812f58cfd Mon Sep 17 00:00:00 2001 From: skyguard1 Date: Mon, 28 Jun 2021 17:07:17 +0800 Subject: [PATCH] Use Two way algorithm to optimize ByteBufUtil.indexOf() method (#11367) Use Two way algorithm to optimize ByteBufUtil.indexOf() method Motivation: ByteBufUtil.indexOf can be inefficient for substring search on ByteBuf, in terms of algorithm complexity (O(needle.readableBytes * haystack.readableBytes)), consider using the Two Way algorithm to optimize the ByteBufUtil.indexOf() method Modification: Use the Two Way algorithm to optimize ByteBufUtil.indexOf() method. Result: The performance of the ByteBufUtil.indexOf() method is higher than the original implementation --- .../java/io/netty/buffer/ByteBufUtil.java | 119 ++++++++++++++++-- .../java/io/netty/buffer/ByteBufUtilTest.java | 29 +++++ 2 files changed, 141 insertions(+), 7 deletions(-) diff --git a/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java b/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java index 12da3301b2..68f1737b12 100644 --- a/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java +++ b/buffer/src/main/java/io/netty/buffer/ByteBufUtil.java @@ -226,20 +226,125 @@ public final class ByteBufUtil { /** * Returns the reader index of needle in haystack, or -1 if needle is not in haystack. + * This method uses the Two-Way + * string matching algorithm, which yields O(1) space complexity and excellent performance. */ public static int indexOf(ByteBuf needle, ByteBuf haystack) { - // TODO: maybe use Boyer Moore for efficiency. - int attempts = haystack.readableBytes() - needle.readableBytes() + 1; - for (int i = 0; i < attempts; i++) { - if (equals(needle, needle.readerIndex(), - haystack, haystack.readerIndex() + i, - needle.readableBytes())) { - return haystack.readerIndex() + i; + if (haystack == null || needle == null) { + return -1; + } + + if (needle.readableBytes() > haystack.readableBytes()) { + return -1; + } + + int n = haystack.readableBytes(); + int m = needle.readableBytes(); + if (m == 0) { + return 0; + } + + // When the needle has only one byte that can be read, + // the firstIndexOf method needs to be called + if (m == 1) { + return firstIndexOf((AbstractByteBuf) haystack, haystack.readerIndex(), + haystack.writerIndex(), needle.getByte(needle.readerIndex())); + } + + int i; + int j = 0; + int aStartIndex = needle.readerIndex(); + int bStartIndex = haystack.readerIndex(); + long suffixes = maxSuf(needle, m, aStartIndex, true); + long prefixes = maxSuf(needle, m, aStartIndex, false); + int ell = Math.max((int) (suffixes >> 32), (int) (prefixes >> 32)); + int per = Math.max((int) suffixes, (int) prefixes); + int memory; + int length = Math.min(m - per, ell + 1); + + if (equals(needle, aStartIndex, needle, aStartIndex + per, length)) { + memory = -1; + while (j <= n - m) { + i = Math.max(ell, memory) + 1; + while (i < m && needle.getByte(i + aStartIndex) == haystack.getByte(i + j + bStartIndex)) { + ++i; + } + if (i > n) { + return -1; + } + if (i >= m) { + i = ell; + while (i > memory && needle.getByte(i + aStartIndex) == haystack.getByte(i + j + bStartIndex)) { + --i; + } + if (i <= memory) { + return j; + } + j += per; + memory = m - per - 1; + } else { + j += i - ell; + memory = -1; + } + } + } else { + per = Math.max(ell + 1, m - ell - 1) + 1; + while (j <= n - m) { + i = ell + 1; + while (i < m && needle.getByte(i + aStartIndex) == haystack.getByte(i + j + bStartIndex)) { + ++i; + } + if (i > n) { + return -1; + } + if (i >= m) { + i = ell; + while (i >= 0 && needle.getByte(i + aStartIndex) == haystack.getByte(i + j + bStartIndex)) { + --i; + } + if (i < 0) { + return j; + } + j += per; + } else { + j += i - ell; + } } } return -1; } + private static long maxSuf(ByteBuf x, int m, int start, boolean isSuffix) { + int p = 1; + int ms = -1; + int j = start; + int k = 1; + byte a; + byte b; + while (j + k < m) { + a = x.getByte(j + k); + b = x.getByte(ms + k); + boolean suffix = isSuffix ? a < b : a > b; + if (suffix) { + j += k; + k = 1; + p = j - ms; + } else if (a == b) { + if (k != p) { + ++k; + } else { + j += p; + k = 1; + } + } else { + ms = j; + j = ms + 1; + k = p = 1; + } + } + return ((long) ms << 32) + p; + } + /** * Returns {@code true} if and only if the two specified buffers are * identical to each other for {@code length} bytes starting at {@code aStartIndex} diff --git a/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java b/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java index cf0230419b..42796a21a8 100644 --- a/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java +++ b/buffer/src/test/java/io/netty/buffer/ByteBufUtilTest.java @@ -107,6 +107,35 @@ public class ByteBufUtilTest { assertThrows(IllegalArgumentException.class, () -> ByteBufUtil.decodeHexDump("fg")); } + @Test + public void testIndexOf() { + ByteBuf haystack = Unpooled.copiedBuffer("abc123", CharsetUtil.UTF_8); + assertEquals(0, ByteBufUtil.indexOf(Unpooled.copiedBuffer("a", CharsetUtil.UTF_8), haystack)); + assertEquals(1, ByteBufUtil.indexOf(Unpooled.copiedBuffer("bc".getBytes(CharsetUtil.UTF_8)), haystack)); + assertEquals(2, ByteBufUtil.indexOf(Unpooled.copiedBuffer("c".getBytes(CharsetUtil.UTF_8)), haystack)); + assertEquals(0, ByteBufUtil.indexOf(Unpooled.copiedBuffer("abc12".getBytes(CharsetUtil.UTF_8)), haystack)); + assertEquals(-1, ByteBufUtil.indexOf(Unpooled.copiedBuffer("abcdef".getBytes(CharsetUtil.UTF_8)), haystack)); + assertEquals(-1, ByteBufUtil.indexOf(Unpooled.copiedBuffer("abc12x".getBytes(CharsetUtil.UTF_8)), haystack)); + assertEquals(-1, ByteBufUtil.indexOf(Unpooled.copiedBuffer("abc123def".getBytes(CharsetUtil.UTF_8)), haystack)); + + final ByteBuf needle = Unpooled.copiedBuffer("abc12", CharsetUtil.UTF_8); + haystack.readerIndex(1); + needle.readerIndex(1); + assertEquals(0, ByteBufUtil.indexOf(needle, haystack)); + haystack.readerIndex(2); + needle.readerIndex(3); + assertEquals(1, ByteBufUtil.indexOf(needle, haystack)); + haystack.readerIndex(1); + needle.readerIndex(2); + assertEquals(1, ByteBufUtil.indexOf(needle, haystack)); + haystack.release(); + + haystack = Unpooled.copiedBuffer("123aab123", CharsetUtil.UTF_8); + assertEquals(3, ByteBufUtil.indexOf(Unpooled.copiedBuffer("aab", CharsetUtil.UTF_8), haystack)); + haystack.release(); + needle.release(); + } + @Test public void equalsBufferSubsections() { byte[] b1 = new byte[128];