Improve WebSocket performance

Motivation:

Websocket performance is to a large account determined through the masking
and unmasking of frames. The current behavior of this in Netty can be
improved.

Modifications:

Perform the XOR operation not bytewise but in int blocks as long as
possible. This reduces the number of necessary operations by 4. Also don't
read the writerIndex in each iteration.
Added a unit test for websocket decoding and encoding for verifiation.

Result:

A large performance gain (up to 50%) in websocket throughput.
This commit is contained in:
Matthias Einwag 2014-10-02 00:25:24 +02:00 committed by Norman Maurer
parent b828e823ef
commit 4eb1529d2c
3 changed files with 204 additions and 2 deletions

View File

@ -63,6 +63,7 @@ import io.netty.handler.codec.TooLongFrameException;
import io.netty.util.internal.logging.InternalLogger;
import io.netty.util.internal.logging.InternalLoggerFactory;
import java.nio.ByteOrder;
import java.util.List;
import static io.netty.buffer.ByteBufUtil.readBytes;
@ -382,7 +383,29 @@ public class WebSocket08FrameDecoder extends ByteToMessageDecoder
}
private void unmask(ByteBuf frame) {
for (int i = frame.readerIndex(); i < frame.writerIndex(); i++) {
int i = frame.readerIndex();
int end = frame.writerIndex();
ByteOrder order = frame.order();
// Remark: & 0xFF is necessary because Java will do signed expansion from
// byte to int which we don't want.
int intMask = ((maskingKey[0] & 0xFF) << 24)
| ((maskingKey[1] & 0xFF) << 16)
| ((maskingKey[2] & 0xFF) << 8)
| (maskingKey[3] & 0xFF);
// If the byte order of our buffers it little endian we have to bring our mask
// into the same format, because getInt() and writeInt() will use a reversed byte order
if (order == ByteOrder.LITTLE_ENDIAN) {
intMask = Integer.reverseBytes(intMask);
}
for (; i + 3 < end; i += 4) {
int unmasked = frame.getInt(i) ^ intMask;
frame.setInt(i, unmasked);
}
for (; i < end; i++) {
frame.setByte(i, frame.getByte(i) ^ maskingKey[i % 4]);
}
}

View File

@ -61,6 +61,7 @@ import io.netty.util.internal.logging.InternalLogger;
import io.netty.util.internal.logging.InternalLoggerFactory;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
/**
@ -173,8 +174,34 @@ public class WebSocket08FrameEncoder extends MessageToMessageEncoder<WebSocketFr
mask = ByteBuffer.allocate(4).putInt(random).array();
buf.writeBytes(mask);
ByteOrder srcOrder = data.order();
ByteOrder dstOrder = buf.order();
int counter = 0;
for (int i = data.readerIndex(); i < data.writerIndex(); i ++) {
int i = data.readerIndex();
int end = data.writerIndex();
if (srcOrder == dstOrder) {
// Use the optimized path only when byte orders match
// Remark: & 0xFF is necessary because Java will do signed expansion from
// byte to int which we don't want.
int intMask = ((mask[0] & 0xFF) << 24)
| ((mask[1] & 0xFF) << 16)
| ((mask[2] & 0xFF) << 8)
| (mask[3] & 0xFF);
// If the byte order of our buffers it little endian we have to bring our mask
// into the same format, because getInt() and writeInt() will use a reversed byte order
if (srcOrder == ByteOrder.LITTLE_ENDIAN) {
intMask = Integer.reverseBytes(intMask);
}
for (; i + 3 < end; i += 4) {
int intData = data.getInt(i);
buf.writeInt(intData ^ intMask);
}
}
for (; i < end; i++) {
byte byteData = data.getByte(i);
buf.writeByte(byteData ^ mask[counter++ % 4]);
}

View File

@ -0,0 +1,152 @@
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.handler.codec.http.websocketx;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.embedded.EmbeddedChannel;
import org.junit.Assert;
import org.junit.Test;
/**
* Tests the WebSocket08FrameEncoder and Decoder implementation.<br>
* Checks whether the combination of encoding and decoding yields the original data.<br>
* Thereby also the masking behavior is checked.
*/
public class WebSocket08EncoderDecoderTest {
private ByteBuf binTestData;
private String strTestData;
private static final int MAX_TESTDATA_LENGTH = 100 * 1024;
private void initTestData() {
binTestData = Unpooled.buffer(MAX_TESTDATA_LENGTH);
byte j = 0;
for (int i = 0; i < MAX_TESTDATA_LENGTH; i++) {
binTestData.array()[i] = j;
j++;
}
StringBuilder s = new StringBuilder();
char c = 'A';
for (int i = 0; i < MAX_TESTDATA_LENGTH; i++) {
s.append(c);
c++;
if (c == 'Z') {
c = 'A';
}
}
strTestData = s.toString();
}
@Test
public void testWebSocketEncodingAndDecoding() {
initTestData();
// Test without masking
EmbeddedChannel outChannel = new EmbeddedChannel(new WebSocket08FrameEncoder(false));
EmbeddedChannel inChannel = new EmbeddedChannel(new WebSocket08FrameDecoder(false, false, 1024 * 1024));
executeTests(outChannel, inChannel);
// Test with activated masking
outChannel = new EmbeddedChannel(new WebSocket08FrameEncoder(true));
inChannel = new EmbeddedChannel(new WebSocket08FrameDecoder(true, false, 1024 * 1024));
executeTests(outChannel, inChannel);
// Release test data
binTestData.release();
}
private void executeTests(EmbeddedChannel outChannel, EmbeddedChannel inChannel) {
// Test at the boundaries of each message type, because this shifts the position of the mask field
// Test min. 4 lengths to check for problems related to an uneven frame length
executeTests(outChannel, inChannel, 0);
executeTests(outChannel, inChannel, 1);
executeTests(outChannel, inChannel, 2);
executeTests(outChannel, inChannel, 3);
executeTests(outChannel, inChannel, 4);
executeTests(outChannel, inChannel, 5);
executeTests(outChannel, inChannel, 125);
executeTests(outChannel, inChannel, 126);
executeTests(outChannel, inChannel, 127);
executeTests(outChannel, inChannel, 128);
executeTests(outChannel, inChannel, 129);
executeTests(outChannel, inChannel, 65535);
executeTests(outChannel, inChannel, 65536);
executeTests(outChannel, inChannel, 65537);
executeTests(outChannel, inChannel, 65538);
executeTests(outChannel, inChannel, 65539);
}
private void executeTests(EmbeddedChannel outChannel, EmbeddedChannel inChannel, int testDataLength) {
testTextWithLen(outChannel, inChannel, testDataLength);
testBinaryWithLen(outChannel, inChannel, testDataLength);
}
private void testTextWithLen(EmbeddedChannel outChannel, EmbeddedChannel inChannel, int testDataLength) {
String testStr = strTestData.substring(0, testDataLength);
outChannel.writeOutbound(new TextWebSocketFrame(testStr));
// Transfer encoded data into decoder
// Loop because there might be multiple frames (gathering write)
while (true) {
ByteBuf encoded = outChannel.readOutbound();
if (encoded != null) {
inChannel.writeInbound(encoded);
} else {
break;
}
}
Object decoded = inChannel.readInbound();
Assert.assertNotNull(decoded);
Assert.assertTrue(decoded instanceof TextWebSocketFrame);
TextWebSocketFrame txt = (TextWebSocketFrame) decoded;
Assert.assertEquals(txt.text(), testStr);
txt.release();
}
private void testBinaryWithLen(EmbeddedChannel outChannel, EmbeddedChannel inChannel, int testDataLength) {
binTestData.retain(); // need to retain for sending and still keeping it
binTestData.setIndex(0, testDataLength); // Send only len bytes
outChannel.writeOutbound(new BinaryWebSocketFrame(binTestData));
// Transfer encoded data into decoder
// Loop because there might be multiple frames (gathering write)
while (true) {
ByteBuf encoded = outChannel.readOutbound();
if (encoded != null) {
inChannel.writeInbound(encoded);
} else {
break;
}
}
Object decoded = inChannel.readInbound();
Assert.assertNotNull(decoded);
Assert.assertTrue(decoded instanceof BinaryWebSocketFrame);
BinaryWebSocketFrame binFrame = (BinaryWebSocketFrame) decoded;
int readable = binFrame.content().readableBytes();
Assert.assertEquals(readable, testDataLength);
for (int i = 0; i < testDataLength; i++) {
Assert.assertEquals(binTestData.getByte(i), binFrame.content().getByte(i));
}
binFrame.release();
}
}