Fix behavior of HttpPostMultipartRequestDecoder for Memory based Factory (#11145)

Motivation: When Memory based Factory is used, if the first chunk starts with Line Break, the HttpData is not filled with the current available buffer if the delimiter is not found yet, while it may add some. Fix JavaDoc to note potential wrong usage of content() or getByteBuf() if HttpDatais has a huge content with the risk of Out Of Memory Exception. Fix JavaDoc to explain how to release properly the Factory, whatever it is in Memory, Disk or Mixed mode. Fix issue #11143 Modifications: First, when the delimiter is not found, instead of searching Line Break from readerIndex(), we should search from readerIndex() + readableBytes() - delimiter size, since this is the only part where usefull Line Break could be searched for, except if readableBytes is less than delimiter size (then we search from readerIndex). Second, when a Memory HttpData is created, it should be assigned an empty buffer to be consistent with the other implementations (Disk or Mixed mode). We cannot change the default behavior of the content() or getByteBuf() of the Memory based HttpData since the ByteBuf is supposed to be null when released, but not empty. When a new ByteBuf is added, one more check verifies if the current ByteBuf is empty, and if so, it is released and replaced by the new one, without creating a new CompositeByteBuf. Result: In the tests testBIgFileUploadDelimiterInMiddleChunkDecoderMemoryFactory and related for other modes, the buffers are starting with a CRLF. When we offer only the prefix part of the multipart (no data at all), the current Partial HttpData has an empty buffer. The first time we offer the data starting with CRLF to the decoder, it now has a correct current Partial HttpData with a buffer not empty. The Benchmark was re-run against this new version. Old Benchmark Mode Cnt Score Error Units HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderBigAdvancedLevel thrpt 6 4,037 ± 0,358 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderBigDisabledLevel thrpt 6 4,226 ± 0,471 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderBigParanoidLevel thrpt 6 0,875 ± 0,029 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderBigSimpleLevel thrpt 6 4,346 ± 0,275 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderHighAdvancedLevel thrpt 6 2,044 ± 0,020 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderHighDisabledLevel thrpt 6 2,278 ± 0,159 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderHighParanoidLevel thrpt 6 0,174 ± 0,004 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderHighSimpleLevel thrpt 6 2,370 ± 0,065 ops/ms New Benchmark Mode Cnt Score Error Units HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderBigAdvancedLevel thrpt 6 5,604 ± 0,415 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderBigDisabledLevel thrpt 6 6,058 ± 0,111 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderBigParanoidLevel thrpt 6 0,914 ± 0,031 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderBigSimpleLevel thrpt 6 6,053 ± 0,051 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderHighAdvancedLevel thrpt 6 2,636 ± 0,141 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderHighDisabledLevel thrpt 6 3,033 ± 0,181 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderHighParanoidLevel thrpt 6 0,178 ± 0,006 ops/ms HttpPostMultipartRequestDecoderBenchmark.multipartRequestDecoderHighSimpleLevel thrpt 6 2,859 ± 0,189 ops/ms So +20 to +40% improvement due to not searching for CRLF/LF into the full buffer when no delimiter is found, but only from the end and delimiter size + 2 (CRLF).
2021-04-16 11:01:15 +02:00 · 2021-04-16 11:01:15 +02:00 · 42dc696c6c
commit 42dc696c6c
parent 3049eacc45
6 changed files with 99 additions and 22 deletions
--- a/codec-http/src/main/java/io/netty/handler/codec/http/multipart/AbstractMemoryHttpData.java
+++ b/codec-http/src/main/java/io/netty/handler/codec/http/multipart/AbstractMemoryHttpData.java
@ -43,6 +43,7 @@ public abstract class AbstractMemoryHttpData extends AbstractHttpData {

    protected AbstractMemoryHttpData(String name, Charset charset, long size) {
        super(name, charset, size);
+        byteBuf = EMPTY_BUFFER;
    }

    @Override
@ -108,6 +109,10 @@ public abstract class AbstractMemoryHttpData extends AbstractHttpData {
            } else if (localsize == 0) {
                // Nothing to add and byteBuf already exists
                buffer.release();
+            } else if (byteBuf.readableBytes() == 0) {
+                // Previous buffer is empty, so just replace it
+                byteBuf.release();
+                byteBuf = buffer;
            } else if (byteBuf instanceof CompositeByteBuf) {
                CompositeByteBuf cbb = (CompositeByteBuf) byteBuf;
                cbb.addComponent(true, buffer);
--- a/codec-http/src/main/java/io/netty/handler/codec/http/multipart/DefaultHttpDataFactory.java
+++ b/codec-http/src/main/java/io/netty/handler/codec/http/multipart/DefaultHttpDataFactory.java
@ -19,7 +19,6 @@ import io.netty.handler.codec.http.DefaultHttpRequest;
 import io.netty.handler.codec.http.HttpConstants;
 import io.netty.handler.codec.http.HttpRequest;

-import java.io.File;
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
@ -38,6 +37,15 @@ import java.util.Map.Entry;
 * <li>MemoryAttribute, DiskAttribute or MixedAttribute</li>
 * <li>MemoryFileUpload, DiskFileUpload or MixedFileUpload</li>
 * </ul>
+ * A good example of releasing HttpData once all work is done is as follow:<br>
+ * <pre>{@code
+ *   for (InterfaceHttpData httpData: decoder.getBodyHttpDatas()) {
+ *     httpData.release();
+ *     factory.removeHttpDataFromClean(request, httpData);
+ *   }
+ *   factory.cleanAllHttpData();
+ *   decoder.destroy();
+ *  }</pre>
 */
 public class DefaultHttpDataFactory implements HttpDataFactory {

--- a/codec-http/src/main/java/io/netty/handler/codec/http/multipart/HttpData.java
+++ b/codec-http/src/main/java/io/netty/handler/codec/http/multipart/HttpData.java
@ -121,7 +121,8 @@ public interface HttpData extends InterfaceHttpData, ByteBufHolder {
    void delete();

    /**
-     * Returns the contents of the file item as an array of bytes.
+     * Returns the contents of the file item as an array of bytes.<br>
+     * Note: this method will allocate a lot of memory, if the data is currently stored on the file system.
     *
     * @return the contents of the file item as an array of bytes.
     * @throws IOException
@ -129,7 +130,8 @@ public interface HttpData extends InterfaceHttpData, ByteBufHolder {
    byte[] get() throws IOException;

    /**
-     * Returns the content of the file item as a ByteBuf
+     * Returns the content of the file item as a ByteBuf.<br>
+     * Note: this method will allocate a lot of memory, if the data is currently stored on the file system.
     *
     * @return the content of the file item as a ByteBuf
     * @throws IOException
--- a/codec-http/src/main/java/io/netty/handler/codec/http/multipart/HttpPostBodyUtil.java
+++ b/codec-http/src/main/java/io/netty/handler/codec/http/multipart/HttpPostBodyUtil.java
@ -158,7 +158,7 @@ final class HttpPostBodyUtil {
    }

    /**
-     * Try to find LF or CRLF as Line Breaking
+     * Try to find first LF or CRLF as Line Breaking
     *
     * @param buffer the buffer to search in
     * @param index the index to start from in the buffer
@ -170,7 +170,7 @@ final class HttpPostBodyUtil {
        int posFirstChar = buffer.bytesBefore(index, toRead, HttpConstants.LF);
        if (posFirstChar == -1) {
            // No LF, so neither CRLF
-            return  -1;
+            return -1;
        }
        if (posFirstChar > 0 && buffer.getByte(index + posFirstChar - 1) == HttpConstants.CR) {
            posFirstChar--;
@ -178,6 +178,38 @@ final class HttpPostBodyUtil {
        return posFirstChar;
    }

+    /**
+     * Try to find last LF or CRLF as Line Breaking
+     *
+     * @param buffer the buffer to search in
+     * @param index the index to start from in the buffer
+     * @return a relative position from index > 0 if LF or CRLF is found
+     *         or < 0 if not found
+     */
+    static int findLastLineBreak(ByteBuf buffer, int index) {
+        int candidate = findLineBreak(buffer, index);
+        int findCRLF = 0;
+        if (candidate >= 0) {
+            if (buffer.getByte(index + candidate) == HttpConstants.CR) {
+                findCRLF = 2;
+            } else {
+                findCRLF = 1;
+            }
+            candidate += findCRLF;
+        }
+        int next;
+        while (candidate > 0 && (next = findLineBreak(buffer, index + candidate)) >= 0) {
+            candidate += next;
+            if (buffer.getByte(index + candidate) == HttpConstants.CR) {
+                findCRLF = 2;
+            } else {
+                findCRLF = 1;
+            }
+            candidate += findCRLF;
+        }
+        return candidate - findCRLF;
+    }
+
    /**
     * Try to find the delimiter, with LF or CRLF in front of it (added as delimiters) if needed
     *
--- a/codec-http/src/main/java/io/netty/handler/codec/http/multipart/HttpPostMultipartRequestDecoder.java
+++ b/codec-http/src/main/java/io/netty/handler/codec/http/multipart/HttpPostMultipartRequestDecoder.java
@ -41,6 +41,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;

+import static io.netty.buffer.Unpooled.EMPTY_BUFFER;
 import static io.netty.handler.codec.http.multipart.HttpPostBodyUtil.BINARY_STRING;
 import static io.netty.handler.codec.http.multipart.HttpPostBodyUtil.BIT_7_STRING;
 import static io.netty.handler.codec.http.multipart.HttpPostBodyUtil.BIT_8_STRING;
@ -1144,8 +1145,16 @@ public class HttpPostMultipartRequestDecoder implements InterfaceHttpPostRequest
        final byte[] bdelimiter = delimiter.getBytes(httpData.getCharset());
        int posDelimiter = HttpPostBodyUtil.findDelimiter(undecodedChunk, startReaderIndex, bdelimiter, true);
        if (posDelimiter < 0) {
-            // Not found but however perhaps because incomplete so search LF or CRLF
-            posDelimiter = HttpPostBodyUtil.findLineBreak(undecodedChunk, startReaderIndex);
+            // Not found but however perhaps because incomplete so search LF or CRLF from the end.
+            // Possible last bytes contain partially delimiter
+            // (delimiter is possibly partially there, at least 1 missing byte),
+            // therefore searching last delimiter.length +1 (+1 for CRLF instead of LF)
+            int lastPosition = undecodedChunk.readableBytes() - bdelimiter.length - 1;
+            if (lastPosition < 0) {
+                // Not enough bytes, but at most delimiter.length bytes available so can still try to find CRLF there
+                lastPosition = 0;
+            }
+            posDelimiter = HttpPostBodyUtil.findLastLineBreak(undecodedChunk, startReaderIndex  + lastPosition);
            if (posDelimiter < 0) {
                // not found so this chunk can be fully added
                ByteBuf content = undecodedChunk.copy();
@ -1157,18 +1166,21 @@ public class HttpPostMultipartRequestDecoder implements InterfaceHttpPostRequest
                undecodedChunk.readerIndex(startReaderIndex);
                undecodedChunk.writerIndex(startReaderIndex);
                return false;
-            } else if (posDelimiter > 0) {
-                // Not fully but still some bytes to provide: httpData is not yet finished since delimiter not found
-                ByteBuf content = undecodedChunk.copy(startReaderIndex, posDelimiter);
-                try {
-                    httpData.addContent(content, false);
-                } catch (IOException e) {
-                    throw new ErrorDataDecoderException(e);
-                }
-                rewriteCurrentBuffer(undecodedChunk, posDelimiter);
+            }
+            // posDelimiter is not from startReaderIndex but from startReaderIndex + lastPosition
+            posDelimiter += lastPosition;
+            if (posDelimiter == 0) {
+                // Nothing to add
                return false;
            }
-            // Empty chunk or so
+            // Not fully but still some bytes to provide: httpData is not yet finished since delimiter not found
+            ByteBuf content = undecodedChunk.copy(startReaderIndex, posDelimiter);
+            try {
+                httpData.addContent(content, false);
+            } catch (IOException e) {
+                throw new ErrorDataDecoderException(e);
+            }
+            rewriteCurrentBuffer(undecodedChunk, posDelimiter);
            return false;
        }
        // Delimiter found at posDelimiter, including LF or CRLF, so httpData has its last chunk
--- a/codec-http/src/test/java/io/netty/handler/codec/http/multipart/HttpPostRequestDecoderTest.java
+++ b/codec-http/src/test/java/io/netty/handler/codec/http/multipart/HttpPostRequestDecoderTest.java
@ -25,6 +25,7 @@ import io.netty.handler.codec.http.DefaultHttpContent;
 import io.netty.handler.codec.http.DefaultHttpRequest;
 import io.netty.handler.codec.http.DefaultLastHttpContent;
 import io.netty.handler.codec.http.FullHttpRequest;
+import io.netty.handler.codec.http.HttpConstants;
 import io.netty.handler.codec.http.HttpHeaderNames;
 import io.netty.handler.codec.http.HttpHeaderValues;
 import io.netty.handler.codec.http.HttpMethod;
@ -1003,27 +1004,44 @@ public class HttpPostRequestDecoderTest {

        HttpPostMultipartRequestDecoder decoder = new HttpPostMultipartRequestDecoder(factory, request);
        decoder.offer(new DefaultHttpContent(Unpooled.wrappedBuffer(prefix.getBytes(CharsetUtil.UTF_8))));
+        assertNotNull(((HttpData) decoder.currentPartialHttpData()).content());

        byte[] body = new byte[bytesPerChunk];
        Arrays.fill(body, (byte) 1);
+        // Set first bytes as CRLF to ensure it is correctly getting the last CRLF
+        body[0] = HttpConstants.CR;
+        body[1] = HttpConstants.LF;
        for (int i = 0; i < nbChunks; i++) {
            ByteBuf content = Unpooled.wrappedBuffer(body, 0, bytesPerChunk);
-            decoder.offer(new DefaultHttpContent(content)); // **OutOfMemory here**
+            decoder.offer(new DefaultHttpContent(content)); // **OutOfMemory previously here**
+            assertNotNull(((HttpData) decoder.currentPartialHttpData()).content());
            content.release();
        }

        byte[] bsuffix1 = suffix1.getBytes(CharsetUtil.UTF_8);
-        byte[] lastbody = new byte[bytesLastChunk + bsuffix1.length];
-        Arrays.fill(body, (byte) 1);
+        byte[] previousLastbody = new byte[bytesLastChunk - bsuffix1.length];
+        byte[] lastbody = new byte[2 * bsuffix1.length];
+        Arrays.fill(previousLastbody, (byte) 1);
+        previousLastbody[0] = HttpConstants.CR;
+        previousLastbody[1] = HttpConstants.LF;
+        Arrays.fill(lastbody, (byte) 1);
+        lastbody[0] = HttpConstants.CR;
+        lastbody[1] = HttpConstants.LF;
        for (int i = 0; i < bsuffix1.length; i++) {
-            lastbody[bytesLastChunk + i] = bsuffix1[i];
+            lastbody[bsuffix1.length + i] = bsuffix1[i];
        }

-        ByteBuf content2 = Unpooled.wrappedBuffer(lastbody, 0, lastbody.length);
+        ByteBuf content2 = Unpooled.wrappedBuffer(previousLastbody, 0, previousLastbody.length);
        decoder.offer(new DefaultHttpContent(content2));
+        assertNotNull(((HttpData) decoder.currentPartialHttpData()).content());
+        content2.release();
+        content2 = Unpooled.wrappedBuffer(lastbody, 0, lastbody.length);
+        decoder.offer(new DefaultHttpContent(content2));
+        assertNotNull(((HttpData) decoder.currentPartialHttpData()).content());
        content2.release();
        content2 = Unpooled.wrappedBuffer(suffix2.getBytes(CharsetUtil.UTF_8));
        decoder.offer(new DefaultHttpContent(content2));
+        assertNull(decoder.currentPartialHttpData());
        content2.release();
        decoder.offer(new DefaultLastHttpContent());