Implement batching of reading and writing when using datagram with io_uring. (#10606)
Motivation: io_uring does not support recvmmsg / sendmmsg directly and so we need to "emulate" it by submitting multiple IORING_IO_RECVMSG / IORING_IO_SENDMSG calls. Modifications: - Allow to issue multiple write / read calls at once no matter what concrete AbstractIOUringChannel subclass it is - Add support for batching recvmsg / sendmsg when using IOUringDatagramChannel Result: Better performance
This commit is contained in:
parent
d266af2778
commit
70b7621963
@ -73,7 +73,14 @@ abstract class AbstractIOUringChannel extends AbstractChannel implements UnixCha
|
|||||||
private static final int WRITE_SCHEDULED = 1 << 4;
|
private static final int WRITE_SCHEDULED = 1 << 4;
|
||||||
private static final int READ_SCHEDULED = 1 << 5;
|
private static final int READ_SCHEDULED = 1 << 5;
|
||||||
private static final int CONNECT_SCHEDULED = 1 << 6;
|
private static final int CONNECT_SCHEDULED = 1 << 6;
|
||||||
private int ioState;
|
// A byte is enough for now.
|
||||||
|
private byte ioState;
|
||||||
|
|
||||||
|
// It's possible that multiple read / writes are issued. We need to keep track of these.
|
||||||
|
// Let's limit the amount of pending writes and reads by Short.MAX_VALUE. Maybe Byte.MAX_VALUE would also be good
|
||||||
|
// enough but let's be a bit more flexible for now.
|
||||||
|
private short numOutstandingWrites;
|
||||||
|
private short numOutstandingReads;
|
||||||
|
|
||||||
private ChannelPromise delayedClose;
|
private ChannelPromise delayedClose;
|
||||||
private boolean inputClosedSeenErrorOnRead;
|
private boolean inputClosedSeenErrorOnRead;
|
||||||
@ -263,34 +270,37 @@ abstract class AbstractIOUringChannel extends AbstractChannel implements UnixCha
|
|||||||
if ((ioState & WRITE_SCHEDULED) != 0) {
|
if ((ioState & WRITE_SCHEDULED) != 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
scheduleWrite(in);
|
if (scheduleWrite(in) > 0) {
|
||||||
|
ioState |= WRITE_SCHEDULED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void scheduleWrite(ChannelOutboundBuffer in) {
|
private int scheduleWrite(ChannelOutboundBuffer in) {
|
||||||
if (delayedClose != null) {
|
if (delayedClose != null || numOutstandingWrites == Short.MAX_VALUE) {
|
||||||
return;
|
return 0;
|
||||||
}
|
}
|
||||||
if (in == null) {
|
if (in == null) {
|
||||||
return;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int msgCount = in.size();
|
int msgCount = in.size();
|
||||||
if (msgCount == 0) {
|
if (msgCount == 0) {
|
||||||
return;
|
return 0;
|
||||||
}
|
}
|
||||||
Object msg = in.current();
|
Object msg = in.current();
|
||||||
|
|
||||||
assert (ioState & WRITE_SCHEDULED) == 0;
|
|
||||||
if (msgCount > 1) {
|
if (msgCount > 1) {
|
||||||
ioUringUnsafe().scheduleWriteMultiple(in);
|
numOutstandingWrites = (short) ioUringUnsafe().scheduleWriteMultiple(in);
|
||||||
} else if ((msg instanceof ByteBuf) && ((ByteBuf) msg).nioBufferCount() > 1 ||
|
} else if ((msg instanceof ByteBuf) && ((ByteBuf) msg).nioBufferCount() > 1 ||
|
||||||
((msg instanceof ByteBufHolder) && ((ByteBufHolder) msg).content().nioBufferCount() > 1)) {
|
((msg instanceof ByteBufHolder) && ((ByteBufHolder) msg).content().nioBufferCount() > 1)) {
|
||||||
// We also need some special handling for CompositeByteBuf
|
// We also need some special handling for CompositeByteBuf
|
||||||
ioUringUnsafe().scheduleWriteMultiple(in);
|
numOutstandingWrites = (short) ioUringUnsafe().scheduleWriteMultiple(in);
|
||||||
} else {
|
} else {
|
||||||
ioUringUnsafe().scheduleWriteSingle(msg);
|
numOutstandingWrites = (short) ioUringUnsafe().scheduleWriteSingle(msg);
|
||||||
}
|
}
|
||||||
ioState |= WRITE_SCHEDULED;
|
// Ensure we never overflow
|
||||||
|
assert numOutstandingWrites > 0;
|
||||||
|
return numOutstandingWrites;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void schedulePollOut() {
|
private void schedulePollOut() {
|
||||||
@ -316,14 +326,16 @@ abstract class AbstractIOUringChannel extends AbstractChannel implements UnixCha
|
|||||||
private IOUringRecvByteAllocatorHandle allocHandle;
|
private IOUringRecvByteAllocatorHandle allocHandle;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Schedule the write of multiple messages in the {@link ChannelOutboundBuffer}.
|
* Schedule the write of multiple messages in the {@link ChannelOutboundBuffer} and returns the number of
|
||||||
|
* {@link #writeComplete(int, int)} calls that are expected because of the scheduled write.
|
||||||
*/
|
*/
|
||||||
protected abstract void scheduleWriteMultiple(ChannelOutboundBuffer in);
|
protected abstract int scheduleWriteMultiple(ChannelOutboundBuffer in);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Schedule the write of a singe message.
|
* Schedule the write of a single message and returns the number of {@link #writeComplete(int, int)} calls
|
||||||
|
* that are expected because of the scheduled write
|
||||||
*/
|
*/
|
||||||
protected abstract void scheduleWriteSingle(Object msg);
|
protected abstract int scheduleWriteSingle(Object msg);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close(ChannelPromise promise) {
|
public void close(ChannelPromise promise) {
|
||||||
@ -463,16 +475,19 @@ abstract class AbstractIOUringChannel extends AbstractChannel implements UnixCha
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final void readComplete(int res) {
|
final void readComplete(int res, int data) {
|
||||||
ioState &= ~READ_SCHEDULED;
|
assert numOutstandingReads > 0;
|
||||||
|
if (--numOutstandingReads == 0) {
|
||||||
|
ioState &= ~READ_SCHEDULED;
|
||||||
|
}
|
||||||
|
|
||||||
readComplete0(res);
|
readComplete0(res, data, numOutstandingReads);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called once a read was completed.
|
* Called once a read was completed.
|
||||||
*/
|
*/
|
||||||
protected abstract void readComplete0(int res);
|
protected abstract void readComplete0(int res, int data, int outstandingCompletes);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called once POLLRDHUP event is ready to be processed
|
* Called once POLLRDHUP event is ready to be processed
|
||||||
@ -486,9 +501,7 @@ abstract class AbstractIOUringChannel extends AbstractChannel implements UnixCha
|
|||||||
recvBufAllocHandle().rdHupReceived();
|
recvBufAllocHandle().rdHupReceived();
|
||||||
|
|
||||||
if (isActive()) {
|
if (isActive()) {
|
||||||
if ((ioState & READ_SCHEDULED) == 0) {
|
scheduleFirstReadIfNeeded();
|
||||||
scheduleFirstRead();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Just to be safe make sure the input marked as closed.
|
// Just to be safe make sure the input marked as closed.
|
||||||
shutdownInput(true);
|
shutdownInput(true);
|
||||||
@ -505,6 +518,10 @@ abstract class AbstractIOUringChannel extends AbstractChannel implements UnixCha
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scheduleFirstReadIfNeeded();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void scheduleFirstReadIfNeeded() {
|
||||||
if ((ioState & READ_SCHEDULED) == 0) {
|
if ((ioState & READ_SCHEDULED) == 0) {
|
||||||
scheduleFirstRead();
|
scheduleFirstRead();
|
||||||
}
|
}
|
||||||
@ -520,16 +537,19 @@ abstract class AbstractIOUringChannel extends AbstractChannel implements UnixCha
|
|||||||
|
|
||||||
protected final void scheduleRead() {
|
protected final void scheduleRead() {
|
||||||
// Only schedule another read if the fd is still open.
|
// Only schedule another read if the fd is still open.
|
||||||
if (delayedClose == null && fd().isOpen()) {
|
if (delayedClose == null && fd().isOpen() && (ioState & READ_SCHEDULED) == 0) {
|
||||||
ioState |= READ_SCHEDULED;
|
numOutstandingReads = (short) scheduleRead0();
|
||||||
scheduleRead0();
|
if (numOutstandingReads > 0) {
|
||||||
|
ioState |= READ_SCHEDULED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A read should be scheduled.
|
* Schedule a read and returns the number of {@link #readComplete(int, int)} calls that are expected because of
|
||||||
|
* the scheduled read.
|
||||||
*/
|
*/
|
||||||
protected abstract void scheduleRead0();
|
protected abstract int scheduleRead0();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called once POLLOUT event is ready to be processed
|
* Called once POLLOUT event is ready to be processed
|
||||||
@ -578,33 +598,32 @@ abstract class AbstractIOUringChannel extends AbstractChannel implements UnixCha
|
|||||||
/**
|
/**
|
||||||
* Called once a write was completed.
|
* Called once a write was completed.
|
||||||
*/
|
*/
|
||||||
final void writeComplete(int res) {
|
final void writeComplete(int res, int data) {
|
||||||
ChannelOutboundBuffer channelOutboundBuffer = unsafe().outboundBuffer();
|
assert numOutstandingWrites > 0;
|
||||||
if (res >= 0) {
|
--numOutstandingWrites;
|
||||||
removeFromOutboundBuffer(channelOutboundBuffer, res);
|
|
||||||
// We only reset this once we are done with calling removeBytes(...) as otherwise we may trigger a write
|
boolean writtenAll = writeComplete0(res, data, numOutstandingWrites);
|
||||||
// while still removing messages internally in removeBytes(...) which then may corrupt state.
|
if (!writtenAll && (ioState & POLL_OUT_SCHEDULED) == 0) {
|
||||||
|
// We were not able to write everything, let's register for POLLOUT
|
||||||
|
schedulePollOut();
|
||||||
|
}
|
||||||
|
|
||||||
|
// We only reset this once we are done with calling removeBytes(...) as otherwise we may trigger a write
|
||||||
|
// while still removing messages internally in removeBytes(...) which then may corrupt state.
|
||||||
|
if (numOutstandingWrites == 0) {
|
||||||
ioState &= ~WRITE_SCHEDULED;
|
ioState &= ~WRITE_SCHEDULED;
|
||||||
doWrite(channelOutboundBuffer);
|
|
||||||
} else {
|
// If we could write all and we did not schedule a pollout yet let us try to write again
|
||||||
ioState &= ~WRITE_SCHEDULED;
|
if (writtenAll && (ioState & POLL_OUT_SCHEDULED) == 0) {
|
||||||
try {
|
doWrite(unsafe().outboundBuffer());
|
||||||
if (ioResult("io_uring write", res) == 0) {
|
|
||||||
// We were not able to write everything, let's register for POLLOUT
|
|
||||||
schedulePollOut();
|
|
||||||
}
|
|
||||||
} catch (Throwable cause) {
|
|
||||||
handleWriteError(cause);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called once a write completed and we should remove message(s) from the {@link ChannelOutboundBuffer}
|
* Called once a write was completed.
|
||||||
*/
|
*/
|
||||||
protected void removeFromOutboundBuffer(ChannelOutboundBuffer outboundBuffer, int bytes) {
|
abstract boolean writeComplete0(int res, int data, int outstanding);
|
||||||
outboundBuffer.removeBytes(bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Connect was completed.
|
* Connect was completed.
|
||||||
|
@ -75,26 +75,33 @@ abstract class AbstractIOUringServerChannel extends AbstractIOUringChannel imple
|
|||||||
final class UringServerChannelUnsafe extends AbstractIOUringChannel.AbstractUringUnsafe {
|
final class UringServerChannelUnsafe extends AbstractIOUringChannel.AbstractUringUnsafe {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void scheduleWriteMultiple(ChannelOutboundBuffer in) {
|
protected int scheduleWriteMultiple(ChannelOutboundBuffer in) {
|
||||||
// Do nothing
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void scheduleWriteSingle(Object msg) {
|
protected int scheduleWriteSingle(Object msg) {
|
||||||
// Do nothing
|
throw new UnsupportedOperationException();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void scheduleRead0() {
|
boolean writeComplete0(int res, int data, int outstanding) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int scheduleRead0() {
|
||||||
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
||||||
allocHandle.attemptedBytesRead(1);
|
allocHandle.attemptedBytesRead(1);
|
||||||
|
|
||||||
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
||||||
submissionQueue.addAccept(fd().intValue(),
|
submissionQueue.addAccept(fd().intValue(),
|
||||||
acceptedAddressMemoryAddress, acceptedAddressLengthMemoryAddress, (short) 0);
|
acceptedAddressMemoryAddress, acceptedAddressLengthMemoryAddress, (short) 0);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void readComplete0(int res) {
|
@Override
|
||||||
|
protected void readComplete0(int res, int data, int outstanding) {
|
||||||
final IOUringRecvByteAllocatorHandle allocHandle =
|
final IOUringRecvByteAllocatorHandle allocHandle =
|
||||||
(IOUringRecvByteAllocatorHandle) unsafe()
|
(IOUringRecvByteAllocatorHandle) unsafe()
|
||||||
.recvBufAllocHandle();
|
.recvBufAllocHandle();
|
||||||
|
@ -211,7 +211,7 @@ abstract class AbstractIOUringStreamChannel extends AbstractIOUringChannel imple
|
|||||||
private ByteBuf readBuffer;
|
private ByteBuf readBuffer;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void scheduleWriteMultiple(ChannelOutboundBuffer in) {
|
protected int scheduleWriteMultiple(ChannelOutboundBuffer in) {
|
||||||
final IovArray iovecArray = ((IOUringEventLoop) eventLoop()).iovArray();
|
final IovArray iovecArray = ((IOUringEventLoop) eventLoop()).iovArray();
|
||||||
try {
|
try {
|
||||||
int offset = iovecArray.count();
|
int offset = iovecArray.count();
|
||||||
@ -222,18 +222,20 @@ abstract class AbstractIOUringStreamChannel extends AbstractIOUringChannel imple
|
|||||||
// This should never happen, anyway fallback to single write.
|
// This should never happen, anyway fallback to single write.
|
||||||
scheduleWriteSingle(in.current());
|
scheduleWriteSingle(in.current());
|
||||||
}
|
}
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void scheduleWriteSingle(Object msg) {
|
protected int scheduleWriteSingle(Object msg) {
|
||||||
ByteBuf buf = (ByteBuf) msg;
|
ByteBuf buf = (ByteBuf) msg;
|
||||||
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
||||||
submissionQueue.addWrite(socket.intValue(), buf.memoryAddress(), buf.readerIndex(),
|
submissionQueue.addWrite(socket.intValue(), buf.memoryAddress(), buf.readerIndex(),
|
||||||
buf.writerIndex(), (short) 0);
|
buf.writerIndex(), (short) 0);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void scheduleRead0() {
|
protected int scheduleRead0() {
|
||||||
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
||||||
ByteBuf byteBuf = allocHandle.allocate(alloc());
|
ByteBuf byteBuf = allocHandle.allocate(alloc());
|
||||||
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
||||||
@ -244,10 +246,11 @@ abstract class AbstractIOUringStreamChannel extends AbstractIOUringChannel imple
|
|||||||
|
|
||||||
submissionQueue.addRead(socket.intValue(), byteBuf.memoryAddress(),
|
submissionQueue.addRead(socket.intValue(), byteBuf.memoryAddress(),
|
||||||
byteBuf.writerIndex(), byteBuf.capacity(), (short) 0);
|
byteBuf.writerIndex(), byteBuf.capacity(), (short) 0);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void readComplete0(int res) {
|
protected void readComplete0(int res, int data, int outstanding) {
|
||||||
boolean close = false;
|
boolean close = false;
|
||||||
|
|
||||||
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
||||||
@ -315,5 +318,21 @@ abstract class AbstractIOUringStreamChannel extends AbstractIOUringChannel imple
|
|||||||
shutdownInput(false);
|
shutdownInput(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
boolean writeComplete0(int res, int data, int outstanding) {
|
||||||
|
if (res >= 0) {
|
||||||
|
unsafe().outboundBuffer().removeBytes(res);
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
if (ioResult("io_uring write", res) == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (Throwable cause) {
|
||||||
|
handleWriteError(cause);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,4 +41,6 @@ public class IOUringChannelOption<T> extends UnixChannelOption<T> {
|
|||||||
ChannelOption.valueOf(IOUringChannelOption.class, "TCP_DEFER_ACCEPT");
|
ChannelOption.valueOf(IOUringChannelOption.class, "TCP_DEFER_ACCEPT");
|
||||||
public static final ChannelOption<Boolean> TCP_QUICKACK = valueOf(IOUringChannelOption.class, "TCP_QUICKACK");
|
public static final ChannelOption<Boolean> TCP_QUICKACK = valueOf(IOUringChannelOption.class, "TCP_QUICKACK");
|
||||||
public static final ChannelOption<Map<InetAddress, byte[]>> TCP_MD5SIG = valueOf("TCP_MD5SIG");
|
public static final ChannelOption<Map<InetAddress, byte[]>> TCP_MD5SIG = valueOf("TCP_MD5SIG");
|
||||||
|
|
||||||
|
public static final ChannelOption<Integer> MAX_DATAGRAM_PAYLOAD_SIZE = valueOf("MAX_DATAGRAM_PAYLOAD_SIZE");
|
||||||
}
|
}
|
||||||
|
@ -26,12 +26,10 @@ import io.netty.channel.DefaultAddressedEnvelope;
|
|||||||
import io.netty.channel.socket.DatagramChannel;
|
import io.netty.channel.socket.DatagramChannel;
|
||||||
import io.netty.channel.socket.DatagramPacket;
|
import io.netty.channel.socket.DatagramPacket;
|
||||||
import io.netty.channel.socket.InternetProtocolFamily;
|
import io.netty.channel.socket.InternetProtocolFamily;
|
||||||
import io.netty.channel.unix.Buffer;
|
|
||||||
import io.netty.channel.unix.Errors;
|
import io.netty.channel.unix.Errors;
|
||||||
import io.netty.channel.unix.Errors.NativeIoException;
|
import io.netty.channel.unix.Errors.NativeIoException;
|
||||||
import io.netty.channel.unix.Socket;
|
import io.netty.channel.unix.Socket;
|
||||||
import io.netty.util.internal.ObjectUtil;
|
import io.netty.util.internal.ObjectUtil;
|
||||||
import io.netty.util.internal.PlatformDependent;
|
|
||||||
import io.netty.util.internal.StringUtil;
|
import io.netty.util.internal.StringUtil;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
@ -41,7 +39,6 @@ import java.net.InetSocketAddress;
|
|||||||
import java.net.NetworkInterface;
|
import java.net.NetworkInterface;
|
||||||
import java.net.PortUnreachableException;
|
import java.net.PortUnreachableException;
|
||||||
import java.net.SocketAddress;
|
import java.net.SocketAddress;
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
|
|
||||||
import static io.netty.channel.unix.Errors.ioResult;
|
import static io.netty.channel.unix.Errors.ioResult;
|
||||||
|
|
||||||
@ -326,75 +323,78 @@ public final class IOUringDatagramChannel extends AbstractIOUringChannel impleme
|
|||||||
}
|
}
|
||||||
|
|
||||||
final class IOUringDatagramChannelUnsafe extends AbstractUringUnsafe {
|
final class IOUringDatagramChannelUnsafe extends AbstractUringUnsafe {
|
||||||
private ByteBuf readBuffer;
|
|
||||||
private boolean recvMsg;
|
|
||||||
|
|
||||||
// These buffers are used for msghdr, iov, sockaddr_in / sockaddr_in6 when doing recvmsg / sendmsg
|
// These buffers are used for msghdr, iov, sockaddr_in / sockaddr_in6 when doing recvmsg / sendmsg
|
||||||
//
|
//
|
||||||
// TODO: Alternative we could also allocate these everytime from the ByteBufAllocator or we could use
|
// TODO: Alternative we could also allocate these everytime from the ByteBufAllocator or we could use
|
||||||
// some sort of other pool. Let's keep it simple for now.
|
// some sort of other pool. Let's keep it simple for now.
|
||||||
private ByteBuffer recvmsgBuffer;
|
//
|
||||||
private long recvmsgBufferAddr = -1;
|
// Consider exposing some configuration for that.
|
||||||
private ByteBuffer sendmsgBuffer;
|
private final MsgHdrMemoryArray recvmsgHdrs = new MsgHdrMemoryArray(256);
|
||||||
private long sendmsgBufferAddr = -1;
|
private final MsgHdrMemoryArray sendmsgHdrs = new MsgHdrMemoryArray(256);
|
||||||
|
private final int[] sendmsgResArray = new int[sendmsgHdrs.capacity()];
|
||||||
|
private final WriteProcessor writeProcessor = new WriteProcessor();
|
||||||
|
|
||||||
private long sendmsgBufferAddr() {
|
private ByteBuf readBuffer;
|
||||||
long address = this.sendmsgBufferAddr;
|
|
||||||
if (address == -1) {
|
|
||||||
assert sendmsgBuffer == null;
|
|
||||||
int length = Native.SIZEOF_MSGHDR + Native.SIZEOF_SOCKADDR_STORAGE + Native.SIZEOF_IOVEC;
|
|
||||||
sendmsgBuffer = Buffer.allocateDirectWithNativeOrder(length);
|
|
||||||
sendmsgBufferAddr = address = Buffer.memoryAddress(sendmsgBuffer);
|
|
||||||
|
|
||||||
// memset once
|
private final class WriteProcessor implements ChannelOutboundBuffer.MessageProcessor {
|
||||||
PlatformDependent.setMemory(address, length, (byte) 0);
|
private int written;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean processMessage(Object msg) {
|
||||||
|
if (scheduleWrite(msg, true)) {
|
||||||
|
written++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
private long recvmsgBufferAddr() {
|
int write(ChannelOutboundBuffer in) {
|
||||||
long address = this.recvmsgBufferAddr;
|
written = 0;
|
||||||
if (address == -1) {
|
try {
|
||||||
assert recvmsgBuffer == null;
|
in.forEachFlushedMessage(this);
|
||||||
int length = Native.SIZEOF_MSGHDR + Native.SIZEOF_SOCKADDR_STORAGE + Native.SIZEOF_IOVEC;
|
} catch (Exception e) {
|
||||||
recvmsgBuffer = Buffer.allocateDirectWithNativeOrder(length);
|
// This should never happen as our processMessage(...) never throws.
|
||||||
recvmsgBufferAddr = address = Buffer.memoryAddress(recvmsgBuffer);
|
throw new IllegalStateException(e);
|
||||||
|
}
|
||||||
// memset once
|
return written;
|
||||||
PlatformDependent.setMemory(address, length, (byte) 0);
|
|
||||||
}
|
}
|
||||||
return address;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void releaseBuffers() {
|
void releaseBuffers() {
|
||||||
if (sendmsgBuffer != null) {
|
sendmsgHdrs.release();
|
||||||
Buffer.free(sendmsgBuffer);
|
recvmsgHdrs.release();
|
||||||
sendmsgBuffer = null;
|
|
||||||
sendmsgBufferAddr = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (recvmsgBuffer != null) {
|
|
||||||
Buffer.free(recvmsgBuffer);
|
|
||||||
recvmsgBuffer = null;
|
|
||||||
recvmsgBufferAddr = -1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void readComplete0(int res) {
|
protected void readComplete0(int res, int data, int outstanding) {
|
||||||
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
||||||
final ChannelPipeline pipeline = pipeline();
|
final ChannelPipeline pipeline = pipeline();
|
||||||
ByteBuf byteBuf = this.readBuffer;
|
ByteBuf byteBuf = this.readBuffer;
|
||||||
this.readBuffer = null;
|
|
||||||
assert byteBuf != null;
|
assert byteBuf != null;
|
||||||
boolean recvmsg = this.recvMsg;
|
|
||||||
this.recvMsg = false;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
if (data == -1) {
|
||||||
|
assert outstanding == 0;
|
||||||
|
// data == -1 means that we did a read(...) and not a recvmmsg(...)
|
||||||
|
readComplete(pipeline, allocHandle, byteBuf, res);
|
||||||
|
} else {
|
||||||
|
recvmsgComplete(pipeline, allocHandle, byteBuf, res, data, outstanding);
|
||||||
|
}
|
||||||
|
} catch (Throwable t) {
|
||||||
|
if (connected && t instanceof NativeIoException) {
|
||||||
|
t = translateForConnected((NativeIoException) t);
|
||||||
|
}
|
||||||
|
pipeline.fireExceptionCaught(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void readComplete(ChannelPipeline pipeline, IOUringRecvByteAllocatorHandle allocHandle,
|
||||||
|
ByteBuf byteBuf, int res) throws IOException {
|
||||||
|
try {
|
||||||
|
this.readBuffer = null;
|
||||||
if (res < 0) {
|
if (res < 0) {
|
||||||
// If res is negative we should pass it to ioResult(...) which will either throw
|
// If res is negative we should pass it to ioResult(...) which will either throw
|
||||||
// or convert it to 0 if we could not read because the socket was not readable.
|
// or convert it to 0 if we could not read because the socket was not readable.
|
||||||
allocHandle.lastBytesRead(ioResult("io_uring read / recvmsg", res));
|
allocHandle.lastBytesRead(ioResult("io_uring read", res));
|
||||||
} else if (res > 0) {
|
} else if (res > 0) {
|
||||||
byteBuf.writerIndex(byteBuf.writerIndex() + res);
|
byteBuf.writerIndex(byteBuf.writerIndex() + res);
|
||||||
allocHandle.lastBytesRead(res);
|
allocHandle.lastBytesRead(res);
|
||||||
@ -410,26 +410,12 @@ public final class IOUringDatagramChannel extends AbstractIOUringChannel impleme
|
|||||||
pipeline.fireChannelReadComplete();
|
pipeline.fireChannelReadComplete();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DatagramPacket packet;
|
|
||||||
if (!recvmsg) {
|
|
||||||
packet = new DatagramPacket(byteBuf, IOUringDatagramChannel.this.localAddress(),
|
|
||||||
IOUringDatagramChannel.this.remoteAddress());
|
|
||||||
} else {
|
|
||||||
long sockaddrAddress = recvmsgBufferAddr() + Native.SIZEOF_MSGHDR;
|
|
||||||
final InetSocketAddress remote;
|
|
||||||
if (socket.isIpv6()) {
|
|
||||||
byte[] bytes = ((IOUringEventLoop) eventLoop()).inet6AddressArray();
|
|
||||||
remote = SockaddrIn.readIPv6(sockaddrAddress, bytes);
|
|
||||||
} else {
|
|
||||||
byte[] bytes = ((IOUringEventLoop) eventLoop()).inet4AddressArray();
|
|
||||||
remote = SockaddrIn.readIPv4(sockaddrAddress, bytes);
|
|
||||||
}
|
|
||||||
packet = new DatagramPacket(byteBuf,
|
|
||||||
IOUringDatagramChannel.this.localAddress(), remote);
|
|
||||||
}
|
|
||||||
allocHandle.incMessagesRead(1);
|
allocHandle.incMessagesRead(1);
|
||||||
pipeline.fireChannelRead(packet);
|
pipeline.fireChannelRead(new DatagramPacket(byteBuf, IOUringDatagramChannel.this.localAddress(),
|
||||||
|
IOUringDatagramChannel.this.remoteAddress()));
|
||||||
byteBuf = null;
|
byteBuf = null;
|
||||||
|
|
||||||
if (allocHandle.continueReading()) {
|
if (allocHandle.continueReading()) {
|
||||||
// Let's schedule another read.
|
// Let's schedule another read.
|
||||||
scheduleRead();
|
scheduleRead();
|
||||||
@ -438,11 +424,6 @@ public final class IOUringDatagramChannel extends AbstractIOUringChannel impleme
|
|||||||
allocHandle.readComplete();
|
allocHandle.readComplete();
|
||||||
pipeline.fireChannelReadComplete();
|
pipeline.fireChannelReadComplete();
|
||||||
}
|
}
|
||||||
} catch (Throwable t) {
|
|
||||||
if (connected && t instanceof NativeIoException) {
|
|
||||||
t = translateForConnected((NativeIoException) t);
|
|
||||||
}
|
|
||||||
pipeline.fireExceptionCaught(t);
|
|
||||||
} finally {
|
} finally {
|
||||||
if (byteBuf != null) {
|
if (byteBuf != null) {
|
||||||
byteBuf.release();
|
byteBuf.release();
|
||||||
@ -450,44 +431,134 @@ public final class IOUringDatagramChannel extends AbstractIOUringChannel impleme
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private void recvmsgComplete(ChannelPipeline pipeline, IOUringRecvByteAllocatorHandle allocHandle,
|
||||||
protected void scheduleRead0() {
|
ByteBuf byteBuf, int res, int idx, int outstanding) throws IOException {
|
||||||
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
MsgHdrMemory hdr = recvmsgHdrs.hdr(idx);
|
||||||
ByteBuf byteBuf = allocHandle.allocate(alloc());
|
|
||||||
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
|
||||||
|
|
||||||
assert readBuffer == null;
|
if (res < 0) {
|
||||||
readBuffer = byteBuf;
|
// If res is negative we should pass it to ioResult(...) which will either throw
|
||||||
|
// or convert it to 0 if we could not read because the socket was not readable.
|
||||||
recvMsg = !isConnected();
|
allocHandle.lastBytesRead(ioResult("io_uring recvmsg", res));
|
||||||
long bufferAddress = byteBuf.memoryAddress();
|
} else if (res > 0) {
|
||||||
allocHandle.attemptedBytesRead(byteBuf.writableBytes());
|
allocHandle.lastBytesRead(res);
|
||||||
|
allocHandle.incMessagesRead(1);
|
||||||
if (!recvMsg) {
|
DatagramPacket packet = hdr.read(IOUringDatagramChannel.this, byteBuf, res);
|
||||||
submissionQueue.addRead(socket.intValue(), bufferAddress,
|
pipeline.fireChannelRead(packet);
|
||||||
byteBuf.writerIndex(), byteBuf.capacity(), (short) 0);
|
|
||||||
} else {
|
} else {
|
||||||
int addrLen = addrLen();
|
allocHandle.lastBytesRead(0);
|
||||||
long recvmsgBufferAddr = recvmsgBufferAddr();
|
}
|
||||||
long sockaddrAddress = recvmsgBufferAddr + Native.SIZEOF_MSGHDR;
|
|
||||||
long iovecAddress = sockaddrAddress + addrLen;
|
|
||||||
|
|
||||||
Iov.write(iovecAddress, bufferAddress + byteBuf.writerIndex(), byteBuf.writableBytes());
|
if (outstanding == 0) {
|
||||||
MsgHdr.write(recvmsgBufferAddr, sockaddrAddress, addrLen, iovecAddress, 1);
|
// There are no outstanding completion events, release the readBuffer and see if we need to schedule
|
||||||
submissionQueue.addRecvmsg(socket.intValue(), recvmsgBufferAddr, (short) 0);
|
// another one or if the user will do it.
|
||||||
|
this.readBuffer.release();
|
||||||
|
this.readBuffer = null;
|
||||||
|
recvmsgHdrs.clear();
|
||||||
|
if (allocHandle.continueReading()) {
|
||||||
|
// Let's schedule another read.
|
||||||
|
scheduleRead();
|
||||||
|
} else {
|
||||||
|
allocHandle.readComplete();
|
||||||
|
pipeline.fireChannelReadComplete();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private int addrLen() {
|
@Override
|
||||||
return socket.isIpv6() ? Native.SIZEOF_SOCKADDR_IN6 :
|
protected int scheduleRead0() {
|
||||||
Native.SIZEOF_SOCKADDR_IN;
|
final IOUringRecvByteAllocatorHandle allocHandle = recvBufAllocHandle();
|
||||||
|
ByteBuf byteBuf = allocHandle.allocate(alloc());
|
||||||
|
assert readBuffer == null;
|
||||||
|
readBuffer = byteBuf;
|
||||||
|
|
||||||
|
int writable = byteBuf.writableBytes();
|
||||||
|
allocHandle.attemptedBytesRead(writable);
|
||||||
|
int datagramSize = config().getMaxDatagramPayloadSize();
|
||||||
|
|
||||||
|
int numDatagram = datagramSize == 0 ? 1 : Math.max(1, byteBuf.writableBytes() / datagramSize);
|
||||||
|
|
||||||
|
if (isConnected() && numDatagram <= 1) {
|
||||||
|
submissionQueue().addRead(socket.intValue(), byteBuf.memoryAddress(),
|
||||||
|
byteBuf.writerIndex(), byteBuf.capacity(), (short) -1);
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
int scheduled = scheduleRecvmsg(byteBuf, numDatagram, datagramSize);
|
||||||
|
if (scheduled == 0) {
|
||||||
|
// We could not schedule any recvmmsg so we need to release the buffer as there will be no
|
||||||
|
// completion event.
|
||||||
|
readBuffer = null;
|
||||||
|
byteBuf.release();
|
||||||
|
}
|
||||||
|
return scheduled;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int scheduleRecvmsg(ByteBuf byteBuf, int numDatagram, int datagramSize) {
|
||||||
|
int writable = byteBuf.writableBytes();
|
||||||
|
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
||||||
|
long bufferAddress = byteBuf.memoryAddress() + byteBuf.writerIndex();
|
||||||
|
if (numDatagram <= 1) {
|
||||||
|
return scheduleRecvmsg0(submissionQueue, bufferAddress, writable) ? 1 : 0;
|
||||||
|
}
|
||||||
|
int i = 0;
|
||||||
|
// Add multiple IORING_OP_RECVMSG to the submission queue. This basically emulates recvmmsg(...)
|
||||||
|
for (; i < numDatagram && writable >= datagramSize; i++) {
|
||||||
|
if (!scheduleRecvmsg0(submissionQueue, bufferAddress, datagramSize)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bufferAddress += datagramSize;
|
||||||
|
writable -= datagramSize;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean scheduleRecvmsg0(IOUringSubmissionQueue submissionQueue, long bufferAddress, int bufferLength) {
|
||||||
|
MsgHdrMemory msgHdrMemory = recvmsgHdrs.nextHdr();
|
||||||
|
if (msgHdrMemory == null) {
|
||||||
|
// We can not continue reading before we did not submit the recvmsg(s) and received the results.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
msgHdrMemory.write(socket, null, bufferAddress, bufferLength);
|
||||||
|
// We always use idx here so we can detect if no idx was used by checking if data < 0 in
|
||||||
|
// readComplete0(...)
|
||||||
|
submissionQueue.addRecvmsg(socket.intValue(), msgHdrMemory.address(), (short) msgHdrMemory.idx());
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void removeFromOutboundBuffer(ChannelOutboundBuffer outboundBuffer, int bytes) {
|
boolean writeComplete0(int res, int data, int outstanding) {
|
||||||
// When using Datagram we should consider the message written as long as there were any bytes written.
|
ChannelOutboundBuffer outboundBuffer = outboundBuffer();
|
||||||
boolean removed = outboundBuffer.remove();
|
if (data == -1) {
|
||||||
assert removed;
|
assert outstanding == 0;
|
||||||
|
// idx == -1 means that we did a write(...) and not a sendmsg(...) operation
|
||||||
|
return removeFromOutboundBuffer(outboundBuffer, res, "io_uring write");
|
||||||
|
}
|
||||||
|
// Store the result so we can handle it as soon as we have no outstanding writes anymore.
|
||||||
|
sendmsgResArray[data] = res;
|
||||||
|
if (outstanding == 0) {
|
||||||
|
// All writes are done as part of a batch. Let's remove these from the ChannelOutboundBuffer
|
||||||
|
boolean writtenSomething = false;
|
||||||
|
int numWritten = sendmsgHdrs.length();
|
||||||
|
sendmsgHdrs.clear();
|
||||||
|
for (int i = 0; i < numWritten; i++) {
|
||||||
|
writtenSomething |= removeFromOutboundBuffer(
|
||||||
|
outboundBuffer, sendmsgResArray[i], "io_uring sendmsg");
|
||||||
|
}
|
||||||
|
return writtenSomething;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean removeFromOutboundBuffer(ChannelOutboundBuffer outboundBuffer, int res, String errormsg) {
|
||||||
|
if (res >= 0) {
|
||||||
|
// When using Datagram we should consider the message written as long as res is not negative.
|
||||||
|
return outboundBuffer.remove();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return ioResult(errormsg, res) != 0;
|
||||||
|
} catch (Throwable cause) {
|
||||||
|
return outboundBuffer.remove(cause);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -499,15 +570,18 @@ public final class IOUringDatagramChannel extends AbstractIOUringChannel impleme
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void scheduleWriteMultiple(ChannelOutboundBuffer in) {
|
protected int scheduleWriteMultiple(ChannelOutboundBuffer in) {
|
||||||
// We always just use scheduleWriteSingle for now.
|
return writeProcessor.write(in);
|
||||||
scheduleWriteSingle(in.current());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void scheduleWriteSingle(Object msg) {
|
protected int scheduleWriteSingle(Object msg) {
|
||||||
|
return scheduleWrite(msg, false) ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean scheduleWrite(Object msg, boolean forceSendmsg) {
|
||||||
final ByteBuf data;
|
final ByteBuf data;
|
||||||
InetSocketAddress remoteAddress;
|
final InetSocketAddress remoteAddress;
|
||||||
if (msg instanceof AddressedEnvelope) {
|
if (msg instanceof AddressedEnvelope) {
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
AddressedEnvelope<ByteBuf, InetSocketAddress> envelope =
|
AddressedEnvelope<ByteBuf, InetSocketAddress> envelope =
|
||||||
@ -522,23 +596,31 @@ public final class IOUringDatagramChannel extends AbstractIOUringChannel impleme
|
|||||||
long bufferAddress = data.memoryAddress();
|
long bufferAddress = data.memoryAddress();
|
||||||
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
IOUringSubmissionQueue submissionQueue = submissionQueue();
|
||||||
if (remoteAddress == null) {
|
if (remoteAddress == null) {
|
||||||
|
if (forceSendmsg) {
|
||||||
|
return scheduleSendmsg(
|
||||||
|
IOUringDatagramChannel.this.remoteAddress(), bufferAddress, data.readableBytes());
|
||||||
|
}
|
||||||
submissionQueue.addWrite(socket.intValue(), bufferAddress, data.readerIndex(),
|
submissionQueue.addWrite(socket.intValue(), bufferAddress, data.readerIndex(),
|
||||||
data.writerIndex(), (short) 0);
|
data.writerIndex(), (short) -1);
|
||||||
} else {
|
return true;
|
||||||
int addrLen = addrLen();
|
|
||||||
long sendmsgBufferAddr = sendmsgBufferAddr();
|
|
||||||
long sockaddrAddress = sendmsgBufferAddr + Native.SIZEOF_MSGHDR;
|
|
||||||
long iovecAddress = sockaddrAddress + Native.SIZEOF_SOCKADDR_STORAGE;
|
|
||||||
|
|
||||||
SockaddrIn.write(socket.isIpv6(), sockaddrAddress, remoteAddress);
|
|
||||||
Iov.write(iovecAddress, bufferAddress + data.readerIndex(), data.readableBytes());
|
|
||||||
MsgHdr.write(sendmsgBufferAddr, sockaddrAddress, addrLen, iovecAddress, 1);
|
|
||||||
submissionQueue.addSendmsg(socket.intValue(), sendmsgBufferAddr, (short) 0);
|
|
||||||
}
|
}
|
||||||
|
return scheduleSendmsg(remoteAddress, bufferAddress, data.readableBytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean scheduleSendmsg(InetSocketAddress remoteAddress, long bufferAddress, int bufferLength) {
|
||||||
|
MsgHdrMemory hdr = sendmsgHdrs.nextHdr();
|
||||||
|
if (hdr == null) {
|
||||||
|
// There is no MsgHdrMemory left to use. We need to submit and wait for the writes to complete
|
||||||
|
// before we can write again.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
hdr.write(socket, remoteAddress, bufferAddress, bufferLength);
|
||||||
|
submissionQueue().addSendmsg(socket.intValue(), hdr.address(), (short) hdr.idx());
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private IOException translateForConnected(NativeIoException e) {
|
private static IOException translateForConnected(NativeIoException e) {
|
||||||
// We need to correctly translate connect errors to match NIO behaviour.
|
// We need to correctly translate connect errors to match NIO behaviour.
|
||||||
if (e.expectedErr() == Errors.ERROR_ECONNREFUSED_NEGATIVE) {
|
if (e.expectedErr() == Errors.ERROR_ECONNREFUSED_NEGATIVE) {
|
||||||
PortUnreachableException error = new PortUnreachableException(e.getMessage());
|
PortUnreachableException error = new PortUnreachableException(e.getMessage());
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
*/
|
*/
|
||||||
package io.netty.channel.uring;
|
package io.netty.channel.uring;
|
||||||
|
|
||||||
|
import io.netty.buffer.ByteBuf;
|
||||||
import io.netty.buffer.ByteBufAllocator;
|
import io.netty.buffer.ByteBufAllocator;
|
||||||
import io.netty.channel.ChannelException;
|
import io.netty.channel.ChannelException;
|
||||||
import io.netty.channel.ChannelOption;
|
import io.netty.channel.ChannelOption;
|
||||||
@ -24,6 +25,7 @@ import io.netty.channel.MessageSizeEstimator;
|
|||||||
import io.netty.channel.RecvByteBufAllocator;
|
import io.netty.channel.RecvByteBufAllocator;
|
||||||
import io.netty.channel.WriteBufferWaterMark;
|
import io.netty.channel.WriteBufferWaterMark;
|
||||||
import io.netty.channel.socket.DatagramChannelConfig;
|
import io.netty.channel.socket.DatagramChannelConfig;
|
||||||
|
import io.netty.util.internal.ObjectUtil;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
@ -33,6 +35,7 @@ import java.util.Map;
|
|||||||
public final class IOUringDatagramChannelConfig extends DefaultChannelConfig implements DatagramChannelConfig {
|
public final class IOUringDatagramChannelConfig extends DefaultChannelConfig implements DatagramChannelConfig {
|
||||||
private static final RecvByteBufAllocator DEFAULT_RCVBUF_ALLOCATOR = new FixedRecvByteBufAllocator(2048);
|
private static final RecvByteBufAllocator DEFAULT_RCVBUF_ALLOCATOR = new FixedRecvByteBufAllocator(2048);
|
||||||
private boolean activeOnOpen;
|
private boolean activeOnOpen;
|
||||||
|
private volatile int maxDatagramSize;
|
||||||
|
|
||||||
IOUringDatagramChannelConfig(AbstractIOUringChannel channel) {
|
IOUringDatagramChannelConfig(AbstractIOUringChannel channel) {
|
||||||
super(channel);
|
super(channel);
|
||||||
@ -49,7 +52,7 @@ public final class IOUringDatagramChannelConfig extends DefaultChannelConfig imp
|
|||||||
ChannelOption.IP_MULTICAST_ADDR, ChannelOption.IP_MULTICAST_IF, ChannelOption.IP_MULTICAST_TTL,
|
ChannelOption.IP_MULTICAST_ADDR, ChannelOption.IP_MULTICAST_IF, ChannelOption.IP_MULTICAST_TTL,
|
||||||
ChannelOption.IP_TOS, ChannelOption.DATAGRAM_CHANNEL_ACTIVE_ON_REGISTRATION,
|
ChannelOption.IP_TOS, ChannelOption.DATAGRAM_CHANNEL_ACTIVE_ON_REGISTRATION,
|
||||||
IOUringChannelOption.SO_REUSEPORT, IOUringChannelOption.IP_FREEBIND,
|
IOUringChannelOption.SO_REUSEPORT, IOUringChannelOption.IP_FREEBIND,
|
||||||
IOUringChannelOption.IP_TRANSPARENT);
|
IOUringChannelOption.IP_TRANSPARENT, IOUringChannelOption.MAX_DATAGRAM_PAYLOAD_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings({ "unchecked", "deprecation" })
|
@SuppressWarnings({ "unchecked", "deprecation" })
|
||||||
@ -94,6 +97,9 @@ public final class IOUringDatagramChannelConfig extends DefaultChannelConfig imp
|
|||||||
if (option == IOUringChannelOption.IP_FREEBIND) {
|
if (option == IOUringChannelOption.IP_FREEBIND) {
|
||||||
return (T) Boolean.valueOf(isFreeBind());
|
return (T) Boolean.valueOf(isFreeBind());
|
||||||
}
|
}
|
||||||
|
if (option == IOUringChannelOption.MAX_DATAGRAM_PAYLOAD_SIZE) {
|
||||||
|
return (T) Integer.valueOf(getMaxDatagramPayloadSize());
|
||||||
|
}
|
||||||
return super.getOption(option);
|
return super.getOption(option);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,6 +134,8 @@ public final class IOUringDatagramChannelConfig extends DefaultChannelConfig imp
|
|||||||
setFreeBind((Boolean) value);
|
setFreeBind((Boolean) value);
|
||||||
} else if (option == IOUringChannelOption.IP_TRANSPARENT) {
|
} else if (option == IOUringChannelOption.IP_TRANSPARENT) {
|
||||||
setIpTransparent((Boolean) value);
|
setIpTransparent((Boolean) value);
|
||||||
|
} else if (option == IOUringChannelOption.MAX_DATAGRAM_PAYLOAD_SIZE) {
|
||||||
|
setMaxDatagramPayloadSize((Integer) value);
|
||||||
} else {
|
} else {
|
||||||
return super.setOption(option, value);
|
return super.setOption(option, value);
|
||||||
}
|
}
|
||||||
@ -463,4 +471,25 @@ public final class IOUringDatagramChannelConfig extends DefaultChannelConfig imp
|
|||||||
throw new ChannelException(e);
|
throw new ChannelException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the maximum {@link io.netty.channel.socket.DatagramPacket} size. This will be used to determine if
|
||||||
|
* a batch of {@code IORING_IO_RECVMSG} should be used when reading from the underlying socket.
|
||||||
|
* When batched {@code recvmmsg} is used
|
||||||
|
* we may be able to read multiple {@link io.netty.channel.socket.DatagramPacket}s with one syscall and so
|
||||||
|
* greatly improve the performance. This number will be used to slice {@link ByteBuf}s returned by the used
|
||||||
|
* {@link RecvByteBufAllocator}. You can use {@code 0} to disable the usage of batching, any other bigger value
|
||||||
|
* will enable it.
|
||||||
|
*/
|
||||||
|
public IOUringDatagramChannelConfig setMaxDatagramPayloadSize(int maxDatagramSize) {
|
||||||
|
this.maxDatagramSize = ObjectUtil.checkPositiveOrZero(maxDatagramSize, "maxDatagramSize");
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the maximum {@link io.netty.channel.socket.DatagramPacket} size.
|
||||||
|
*/
|
||||||
|
public int getMaxDatagramPayloadSize() {
|
||||||
|
return maxDatagramSize;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -235,10 +235,10 @@ final class IOUringEventLoop extends SingleThreadEventLoop implements IOUringCom
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (op == Native.IORING_OP_READ || op == Native.IORING_OP_ACCEPT || op == Native.IORING_OP_RECVMSG) {
|
if (op == Native.IORING_OP_READ || op == Native.IORING_OP_ACCEPT || op == Native.IORING_OP_RECVMSG) {
|
||||||
handleRead(channel, res);
|
handleRead(channel, res, data);
|
||||||
} else if (op == Native.IORING_OP_WRITEV ||
|
} else if (op == Native.IORING_OP_WRITEV ||
|
||||||
op == Native.IORING_OP_WRITE || op == Native.IORING_OP_SENDMSG) {
|
op == Native.IORING_OP_WRITE || op == Native.IORING_OP_SENDMSG) {
|
||||||
handleWrite(channel, res);
|
handleWrite(channel, res, data);
|
||||||
} else if (op == Native.IORING_OP_POLL_ADD) {
|
} else if (op == Native.IORING_OP_POLL_ADD) {
|
||||||
handlePollAdd(channel, res, data);
|
handlePollAdd(channel, res, data);
|
||||||
} else if (op == Native.IORING_OP_POLL_REMOVE) {
|
} else if (op == Native.IORING_OP_POLL_REMOVE) {
|
||||||
@ -259,12 +259,12 @@ final class IOUringEventLoop extends SingleThreadEventLoop implements IOUringCom
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleRead(AbstractIOUringChannel channel, int res) {
|
private void handleRead(AbstractIOUringChannel channel, int res, int data) {
|
||||||
channel.ioUringUnsafe().readComplete(res);
|
channel.ioUringUnsafe().readComplete(res, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleWrite(AbstractIOUringChannel channel, int res) {
|
private void handleWrite(AbstractIOUringChannel channel, int res, int data) {
|
||||||
channel.ioUringUnsafe().writeComplete(res);
|
channel.ioUringUnsafe().writeComplete(res, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handlePollAdd(AbstractIOUringChannel channel, int res, int pollMask) {
|
private void handlePollAdd(AbstractIOUringChannel channel, int res, int pollMask) {
|
||||||
|
@ -37,4 +37,20 @@ final class Iov {
|
|||||||
PlatformDependent.putLong(iovAddress + Native.IOVEC_OFFSETOF_IOV_LEN, length);
|
PlatformDependent.putLong(iovAddress + Native.IOVEC_OFFSETOF_IOV_LEN, length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static long readBufferAddress(long iovAddress) {
|
||||||
|
if (Native.SIZEOF_SIZE_T == 4) {
|
||||||
|
return PlatformDependent.getInt(iovAddress + Native.IOVEC_OFFSETOF_IOV_BASE);
|
||||||
|
}
|
||||||
|
assert Native.SIZEOF_SIZE_T == 8;
|
||||||
|
return PlatformDependent.getLong(iovAddress + Native.IOVEC_OFFSETOF_IOV_BASE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int readBufferLength(long iovAddress) {
|
||||||
|
if (Native.SIZEOF_SIZE_T == 4) {
|
||||||
|
return PlatformDependent.getInt(iovAddress + Native.IOVEC_OFFSETOF_IOV_LEN);
|
||||||
|
}
|
||||||
|
assert Native.SIZEOF_SIZE_T == 8;
|
||||||
|
return (int) PlatformDependent.getLong(iovAddress + Native.IOVEC_OFFSETOF_IOV_LEN);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,83 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 The Netty Project
|
||||||
|
*
|
||||||
|
* The Netty Project licenses this file to you under the Apache License,
|
||||||
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at:
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package io.netty.channel.uring;
|
||||||
|
|
||||||
|
import io.netty.buffer.ByteBuf;
|
||||||
|
import io.netty.channel.socket.DatagramPacket;
|
||||||
|
import io.netty.util.internal.PlatformDependent;
|
||||||
|
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
final class MsgHdrMemory {
|
||||||
|
private final long memory;
|
||||||
|
private final int idx;
|
||||||
|
|
||||||
|
MsgHdrMemory(int idx) {
|
||||||
|
this.idx = idx;
|
||||||
|
int size = Native.SIZEOF_MSGHDR + Native.SIZEOF_SOCKADDR_STORAGE + Native.SIZEOF_IOVEC;
|
||||||
|
memory = PlatformDependent.allocateMemory(size);
|
||||||
|
PlatformDependent.setMemory(memory, size, (byte) 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void write(LinuxSocket socket, InetSocketAddress address, long bufferAddress , int length) {
|
||||||
|
long sockAddress = memory + Native.SIZEOF_MSGHDR;
|
||||||
|
long iovAddress = sockAddress + Native.SIZEOF_SOCKADDR_STORAGE;
|
||||||
|
int addressLength;
|
||||||
|
if (address == null) {
|
||||||
|
addressLength = socket.isIpv6() ? Native.SIZEOF_SOCKADDR_IN6 : Native.SIZEOF_SOCKADDR_IN;
|
||||||
|
PlatformDependent.setMemory(sockAddress, Native.SIZEOF_SOCKADDR_STORAGE, (byte) 0);
|
||||||
|
} else {
|
||||||
|
addressLength = SockaddrIn.write(socket.isIpv6(), sockAddress, address);
|
||||||
|
}
|
||||||
|
Iov.write(iovAddress, bufferAddress, length);
|
||||||
|
MsgHdr.write(memory, sockAddress, addressLength, iovAddress, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
DatagramPacket read(IOUringDatagramChannel channel, ByteBuf buffer, int bytesRead) {
|
||||||
|
long sockAddress = memory + Native.SIZEOF_MSGHDR;
|
||||||
|
IOUringEventLoop eventLoop = (IOUringEventLoop) channel.eventLoop();
|
||||||
|
InetSocketAddress sender;
|
||||||
|
if (channel.socket.isIpv6()) {
|
||||||
|
byte[] bytes = eventLoop.inet6AddressArray();
|
||||||
|
sender = SockaddrIn.readIPv6(sockAddress, bytes);
|
||||||
|
} else {
|
||||||
|
byte[] bytes = eventLoop.inet4AddressArray();
|
||||||
|
sender = SockaddrIn.readIPv4(sockAddress, bytes);
|
||||||
|
}
|
||||||
|
long iovAddress = memory + Native.SIZEOF_MSGHDR + Native.SIZEOF_SOCKADDR_STORAGE;
|
||||||
|
long bufferAddress = Iov.readBufferAddress(iovAddress);
|
||||||
|
int bufferLength = Iov.readBufferLength(iovAddress);
|
||||||
|
// reconstruct the reader index based on the memoryAddress of the buffer and the bufferAddress that was used
|
||||||
|
// in the iovec.
|
||||||
|
int readerIndex = (int) (bufferAddress - buffer.memoryAddress());
|
||||||
|
|
||||||
|
ByteBuf slice = buffer.slice(readerIndex, bufferLength)
|
||||||
|
.writerIndex(bytesRead);
|
||||||
|
return new DatagramPacket(slice.retain(), channel.localAddress(), sender);
|
||||||
|
}
|
||||||
|
|
||||||
|
int idx() {
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
long address() {
|
||||||
|
return memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
void release() {
|
||||||
|
PlatformDependent.freeMemory(memory);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,59 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2020 The Netty Project
|
||||||
|
*
|
||||||
|
* The Netty Project licenses this file to you under the Apache License,
|
||||||
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at:
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package io.netty.channel.uring;
|
||||||
|
|
||||||
|
final class MsgHdrMemoryArray {
|
||||||
|
private int idx;
|
||||||
|
private final MsgHdrMemory[] hdrs;
|
||||||
|
private final int capacity;
|
||||||
|
|
||||||
|
MsgHdrMemoryArray(int capacity) {
|
||||||
|
this.capacity = capacity;
|
||||||
|
hdrs = new MsgHdrMemory[capacity];
|
||||||
|
for (int i = 0; i < hdrs.length; i++) {
|
||||||
|
hdrs[i] = new MsgHdrMemory(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MsgHdrMemory nextHdr() {
|
||||||
|
if (idx == hdrs.length - 1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return hdrs[idx++];
|
||||||
|
}
|
||||||
|
|
||||||
|
MsgHdrMemory hdr(int idx) {
|
||||||
|
return hdrs[idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear() {
|
||||||
|
idx = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int length() {
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void release() {
|
||||||
|
for (MsgHdrMemory hdr: hdrs) {
|
||||||
|
hdr.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int capacity() {
|
||||||
|
return capacity;
|
||||||
|
}
|
||||||
|
}
|
@ -29,11 +29,11 @@ final class SockaddrIn {
|
|||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, (byte) 0xff, (byte) 0xff };
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, (byte) 0xff, (byte) 0xff };
|
||||||
private SockaddrIn() { }
|
private SockaddrIn() { }
|
||||||
|
|
||||||
static void write(boolean ipv6, long memory, InetSocketAddress address) {
|
static int write(boolean ipv6, long memory, InetSocketAddress address) {
|
||||||
if (ipv6) {
|
if (ipv6) {
|
||||||
SockaddrIn.writeIPv6(memory, address.getAddress(), address.getPort());
|
return SockaddrIn.writeIPv6(memory, address.getAddress(), address.getPort());
|
||||||
} else {
|
} else {
|
||||||
SockaddrIn.writeIPv4(memory, address.getAddress(), address.getPort());
|
return SockaddrIn.writeIPv4(memory, address.getAddress(), address.getPort());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user