[#2667] Write until EAGAIN in native transport
Motivation: We need to continue write until we hit EAGAIN to make sure we not see an starvation Modification: Write until EAGAIN is returned Result: No starvation when using native transport with ET.
This commit is contained in:
parent
49d2cfe4ea
commit
3ca19af2f8
@ -581,14 +581,7 @@ JNIEXPORT jint JNICALL Java_io_netty_channel_epoll_Native_write(JNIEnv * env, jc
|
|||||||
throwRuntimeException(env, "Unable to access address of buffer");
|
throwRuntimeException(env, "Unable to access address of buffer");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
jint res = write0(env, clazz, fd, buffer, pos, limit);
|
return write0(env, clazz, fd, buffer, pos, limit);
|
||||||
if (res > 0) {
|
|
||||||
// Increment the pos of the ByteBuffer as it may be only partial written to prevent data-corruption later once we
|
|
||||||
// try to write the remaining data.
|
|
||||||
// See https://github.com/netty/netty/issues/2371
|
|
||||||
incrementPosition(env, jbuffer, res);
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
JNIEXPORT jint JNICALL Java_io_netty_channel_epoll_Native_writeAddress(JNIEnv * env, jclass clazz, jint fd, jlong address, jint pos, jint limit) {
|
JNIEXPORT jint JNICALL Java_io_netty_channel_epoll_Native_writeAddress(JNIEnv * env, jclass clazz, jint fd, jlong address, jint pos, jint limit) {
|
||||||
@ -675,18 +668,6 @@ JNIEXPORT jobject JNICALL Java_io_netty_channel_epoll_Native_recvFromAddress(JNI
|
|||||||
return recvFrom0(env, fd, (void*) address, pos, limit);
|
return recvFrom0(env, fd, (void*) address, pos, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
void incrementPosition(JNIEnv * env, jobject bufObj, int written) {
|
|
||||||
// Get the current position using the (*env)->GetIntField if possible and fallback
|
|
||||||
// to slower (*env)->CallIntMethod(...) if needed
|
|
||||||
if (posFieldId == NULL) {
|
|
||||||
jint pos = (*env)->CallIntMethod(env, bufObj, posId, NULL);
|
|
||||||
(*env)->CallObjectMethod(env, bufObj, updatePosId, pos + written);
|
|
||||||
} else {
|
|
||||||
jint pos = (*env)->GetIntField(env, bufObj, posFieldId);
|
|
||||||
(*env)->SetIntField(env, bufObj, posFieldId, pos + written);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
jlong writev0(JNIEnv * env, jclass clazz, jint fd, struct iovec iov[], jint length) {
|
jlong writev0(JNIEnv * env, jclass clazz, jint fd, struct iovec iov[], jint length) {
|
||||||
ssize_t res;
|
ssize_t res;
|
||||||
int err;
|
int err;
|
||||||
|
@ -107,28 +107,43 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
* Write bytes form the given {@link ByteBuf} to the underlying {@link java.nio.channels.Channel}.
|
* Write bytes form the given {@link ByteBuf} to the underlying {@link java.nio.channels.Channel}.
|
||||||
* @param buf the {@link ByteBuf} from which the bytes should be written
|
* @param buf the {@link ByteBuf} from which the bytes should be written
|
||||||
*/
|
*/
|
||||||
private int doWriteBytes(ByteBuf buf) throws Exception {
|
private boolean writeBytes(ChannelOutboundBuffer in, ByteBuf buf) throws Exception {
|
||||||
int readerIndex = buf.readerIndex();
|
int readableBytes = buf.readableBytes();
|
||||||
int localFlushedAmount;
|
if (readableBytes == 0) {
|
||||||
|
in.remove();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
boolean setEpollOut = false;
|
||||||
|
boolean done = false;
|
||||||
|
long writtenBytes = 0;
|
||||||
if (buf.nioBufferCount() == 1) {
|
if (buf.nioBufferCount() == 1) {
|
||||||
if (buf.hasMemoryAddress()) {
|
int readerIndex = buf.readerIndex();
|
||||||
localFlushedAmount = Native.writeAddress(fd, buf.memoryAddress(), readerIndex, buf.writerIndex());
|
ByteBuffer nioBuf = buf.internalNioBuffer(readerIndex, buf.readableBytes());
|
||||||
} else {
|
for (;;) {
|
||||||
ByteBuffer nioBuf = buf.internalNioBuffer(readerIndex, buf.readableBytes());
|
int pos = nioBuf.position();
|
||||||
localFlushedAmount = Native.write(fd, nioBuf, nioBuf.position(), nioBuf.limit());
|
int limit = nioBuf.limit();
|
||||||
|
int localFlushedAmount = Native.write(fd, nioBuf, pos, limit);
|
||||||
|
if (localFlushedAmount > 0) {
|
||||||
|
nioBuf.position(pos + localFlushedAmount);
|
||||||
|
writtenBytes += localFlushedAmount;
|
||||||
|
if (writtenBytes == readableBytes) {
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
setEpollOut = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
updateOutboundBuffer(in, writtenBytes, 1, done, setEpollOut);
|
||||||
|
return done;
|
||||||
} else {
|
} else {
|
||||||
// backed by more then one buffer, do a gathering write...
|
ByteBuffer[] nioBuffers = buf.nioBuffers();
|
||||||
ByteBuffer[] nioBufs = buf.nioBuffers();
|
return writeBytesMultiple0(in, 1, nioBuffers, nioBuffers.length, readableBytes);
|
||||||
localFlushedAmount = (int) Native.writev(fd, nioBufs, 0, nioBufs.length);
|
|
||||||
}
|
}
|
||||||
if (localFlushedAmount > 0) {
|
|
||||||
buf.readerIndex(readerIndex + localFlushedAmount);
|
|
||||||
}
|
|
||||||
return localFlushedAmount;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeBytesMultiple(
|
private boolean writeBytesMultiple(
|
||||||
EpollChannelOutboundBuffer in, int msgCount, AddressEntry[] addresses) throws IOException {
|
EpollChannelOutboundBuffer in, int msgCount, AddressEntry[] addresses) throws IOException {
|
||||||
|
|
||||||
int addressCnt = in.addressCount();
|
int addressCnt = in.addressCount();
|
||||||
@ -138,9 +153,8 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
long writtenBytes = 0;
|
long writtenBytes = 0;
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
int end = offset + addressCnt;
|
int end = offset + addressCnt;
|
||||||
int spinCount = config.getWriteSpinCount();
|
|
||||||
loop: while (addressCnt > 0) {
|
loop: while (addressCnt > 0) {
|
||||||
for (int i = spinCount - 1; i >= 0; i --) {
|
for (;;) {
|
||||||
int cnt = addressCnt > Native.IOV_MAX? Native.IOV_MAX : addressCnt;
|
int cnt = addressCnt > Native.IOV_MAX? Native.IOV_MAX : addressCnt;
|
||||||
|
|
||||||
long localWrittenBytes = Native.writevAddresses(fd, addresses, offset, cnt);
|
long localWrittenBytes = Native.writevAddresses(fd, addresses, offset, cnt);
|
||||||
@ -165,30 +179,33 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
localWrittenBytes -= bytes;
|
localWrittenBytes -= bytes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (expectedWrittenBytes == 0) {
|
if (expectedWrittenBytes == 0) {
|
||||||
done = true;
|
done = true;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
updateOutboundBuffer(in, writtenBytes, msgCount, done, setEpollOut);
|
updateOutboundBuffer(in, writtenBytes, msgCount, done, setEpollOut);
|
||||||
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeBytesMultiple(
|
private boolean writeBytesMultiple(
|
||||||
NioSocketChannelOutboundBuffer in, int msgCount, ByteBuffer[] nioBuffers) throws IOException {
|
NioSocketChannelOutboundBuffer in, int msgCount, ByteBuffer[] nioBuffers) throws IOException {
|
||||||
|
return writeBytesMultiple0(in, msgCount, nioBuffers, in.nioBufferCount(), in.nioBufferSize());
|
||||||
|
}
|
||||||
|
|
||||||
int nioBufferCnt = in.nioBufferCount();
|
private boolean writeBytesMultiple0(
|
||||||
long expectedWrittenBytes = in.nioBufferSize();
|
ChannelOutboundBuffer in, int msgCount, ByteBuffer[] nioBuffers,
|
||||||
|
int nioBufferCnt, long expectedWrittenBytes) throws IOException {
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
boolean setEpollOut = false;
|
boolean setEpollOut = false;
|
||||||
long writtenBytes = 0;
|
long writtenBytes = 0;
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
int end = offset + nioBufferCnt;
|
int end = offset + nioBufferCnt;
|
||||||
int spinCount = config.getWriteSpinCount();
|
|
||||||
loop: while (nioBufferCnt > 0) {
|
loop: while (nioBufferCnt > 0) {
|
||||||
for (int i = spinCount - 1; i >= 0; i --) {
|
for (;;) {
|
||||||
int cnt = nioBufferCnt > Native.IOV_MAX? Native.IOV_MAX : nioBufferCnt;
|
int cnt = nioBufferCnt > Native.IOV_MAX? Native.IOV_MAX : nioBufferCnt;
|
||||||
|
|
||||||
long localWrittenBytes = Native.writev(fd, nioBuffers, offset, cnt);
|
long localWrittenBytes = Native.writev(fd, nioBuffers, offset, cnt);
|
||||||
@ -206,6 +223,9 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
if (bytes > localWrittenBytes) {
|
if (bytes > localWrittenBytes) {
|
||||||
buffer.position(pos + (int) localWrittenBytes);
|
buffer.position(pos + (int) localWrittenBytes);
|
||||||
// incomplete write
|
// incomplete write
|
||||||
|
|
||||||
|
// As we use edge-triggered we need to set EPOLLOUT as otherwise we may not get notified again
|
||||||
|
setEpollOut();
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
offset++;
|
offset++;
|
||||||
@ -213,15 +233,15 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
localWrittenBytes -= bytes;
|
localWrittenBytes -= bytes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (expectedWrittenBytes == 0) {
|
if (expectedWrittenBytes == 0) {
|
||||||
done = true;
|
done = true;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
updateOutboundBuffer(in, writtenBytes, msgCount, done, setEpollOut);
|
updateOutboundBuffer(in, writtenBytes, msgCount, done, setEpollOut);
|
||||||
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateOutboundBuffer(ChannelOutboundBuffer in, long writtenBytes, int msgCount,
|
private void updateOutboundBuffer(ChannelOutboundBuffer in, long writtenBytes, int msgCount,
|
||||||
@ -231,11 +251,7 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
for (int i = msgCount; i > 0; i --) {
|
for (int i = msgCount; i > 0; i --) {
|
||||||
in.remove();
|
in.remove();
|
||||||
}
|
}
|
||||||
|
in.progress(writtenBytes);
|
||||||
// Finish the write loop if no new messages were flushed by in.remove().
|
|
||||||
if (in.isEmpty()) {
|
|
||||||
clearEpollOut();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Did not write all buffers completely.
|
// Did not write all buffers completely.
|
||||||
// Release the fully written buffers and update the indexes of the partially written buffer.
|
// Release the fully written buffers and update the indexes of the partially written buffer.
|
||||||
@ -273,7 +289,7 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
flushTask = this.flushTask = new Runnable() {
|
flushTask = this.flushTask = new Runnable() {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
flush();
|
unsafe().flush();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -287,8 +303,37 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
* @param region the {@link DefaultFileRegion} from which the bytes should be written
|
* @param region the {@link DefaultFileRegion} from which the bytes should be written
|
||||||
* @return amount the amount of written bytes
|
* @return amount the amount of written bytes
|
||||||
*/
|
*/
|
||||||
private long doWriteFileRegion(DefaultFileRegion region, long count) throws Exception {
|
private boolean writeFileRegion(ChannelOutboundBuffer in, DefaultFileRegion region) throws Exception {
|
||||||
return Native.sendfile(fd, region, region.transfered(), count);
|
boolean setOpWrite = false;
|
||||||
|
boolean done = false;
|
||||||
|
long flushedAmount = 0;
|
||||||
|
|
||||||
|
for (int i = config().getWriteSpinCount() - 1; i >= 0; i --) {
|
||||||
|
long expected = region.count() - region.position();
|
||||||
|
long localFlushedAmount = Native.sendfile(fd, region, region.transfered(), expected);
|
||||||
|
if (localFlushedAmount == 0) {
|
||||||
|
setOpWrite = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
flushedAmount += localFlushedAmount;
|
||||||
|
if (region.transfered() >= region.count()) {
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
// As we use edge-triggered we need to set EPOLLOUT as otherwise we may not get notified again
|
||||||
|
setEpollOut();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
in.progress(flushedAmount);
|
||||||
|
|
||||||
|
if (done) {
|
||||||
|
in.remove();
|
||||||
|
} else {
|
||||||
|
incompleteWrite(setOpWrite);
|
||||||
|
}
|
||||||
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -312,7 +357,11 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
// Ensure the pending writes are made of memoryaddresses only.
|
// Ensure the pending writes are made of memoryaddresses only.
|
||||||
AddressEntry[] addresses = epollIn.memoryAddresses();
|
AddressEntry[] addresses = epollIn.memoryAddresses();
|
||||||
if (addresses != null) {
|
if (addresses != null) {
|
||||||
writeBytesMultiple(epollIn, msgCount, addresses);
|
if (!writeBytesMultiple(epollIn, msgCount, addresses)) {
|
||||||
|
// was not able to write everything so break here we will get notified later again once
|
||||||
|
// the network stack can handle more writes.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// We do not break the loop here even if the outbound buffer was flushed completely,
|
// We do not break the loop here even if the outbound buffer was flushed completely,
|
||||||
// because a user might have triggered another write and flush when we notify his or her
|
// because a user might have triggered another write and flush when we notify his or her
|
||||||
@ -322,9 +371,13 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
} else {
|
} else {
|
||||||
NioSocketChannelOutboundBuffer nioIn = (NioSocketChannelOutboundBuffer) in;
|
NioSocketChannelOutboundBuffer nioIn = (NioSocketChannelOutboundBuffer) in;
|
||||||
// Ensure the pending writes are made of memoryaddresses only.
|
// Ensure the pending writes are made of memoryaddresses only.
|
||||||
ByteBuffer[] buffers = nioIn.nioBuffers();
|
ByteBuffer[] nioBuffers = nioIn.nioBuffers();
|
||||||
if (buffers != null) {
|
if (nioBuffers != null) {
|
||||||
writeBytesMultiple(nioIn, msgCount, buffers);
|
if (!writeBytesMultiple(nioIn, msgCount, nioBuffers)) {
|
||||||
|
// was not able to write everything so break here we will get notified later again once
|
||||||
|
// the network stack can handle more writes.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// We do not break the loop here even if the outbound buffer was flushed completely,
|
// We do not break the loop here even if the outbound buffer was flushed completely,
|
||||||
// because a user might have triggered another write and flush when we notify his or her
|
// because a user might have triggered another write and flush when we notify his or her
|
||||||
@ -338,52 +391,18 @@ public final class EpollSocketChannel extends AbstractEpollChannel implements So
|
|||||||
Object msg = in.current();
|
Object msg = in.current();
|
||||||
if (msg instanceof ByteBuf) {
|
if (msg instanceof ByteBuf) {
|
||||||
ByteBuf buf = (ByteBuf) msg;
|
ByteBuf buf = (ByteBuf) msg;
|
||||||
int readableBytes = buf.readableBytes();
|
if (!writeBytes(in, buf)) {
|
||||||
if (readableBytes == 0) {
|
// was not able to write everything so break here we will get notified later again once
|
||||||
in.remove();
|
// the network stack can handle more writes.
|
||||||
continue;
|
|
||||||
}
|
|
||||||
boolean setEpollOut = false;
|
|
||||||
boolean done = false;
|
|
||||||
long flushedAmount = 0;
|
|
||||||
int writeSpinCount = config().getWriteSpinCount();
|
|
||||||
for (int i = writeSpinCount - 1; i >= 0; i --) {
|
|
||||||
int localFlushedAmount = doWriteBytes(buf);
|
|
||||||
if (localFlushedAmount == 0) {
|
|
||||||
setEpollOut = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
flushedAmount += localFlushedAmount;
|
|
||||||
if (!buf.isReadable()) {
|
|
||||||
done = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
in.progress(flushedAmount);
|
|
||||||
|
|
||||||
if (done) {
|
|
||||||
in.remove();
|
|
||||||
} else {
|
|
||||||
incompleteWrite(setEpollOut);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (msg instanceof DefaultFileRegion) {
|
} else if (msg instanceof DefaultFileRegion) {
|
||||||
DefaultFileRegion region = (DefaultFileRegion) msg;
|
DefaultFileRegion region = (DefaultFileRegion) msg;
|
||||||
|
if (!writeFileRegion(in, region)) {
|
||||||
long expected = region.count() - region.position();
|
// was not able to write everything so break here we will get notified later again once
|
||||||
long localFlushedAmount = doWriteFileRegion(region, expected);
|
// the network stack can handle more writes.
|
||||||
in.progress(localFlushedAmount);
|
|
||||||
|
|
||||||
if (localFlushedAmount < expected) {
|
|
||||||
setEpollOut();
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (region.transfered() >= region.count()) {
|
|
||||||
in.remove();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
throw new UnsupportedOperationException("unsupported message type: " + StringUtil.simpleClassName(msg));
|
throw new UnsupportedOperationException("unsupported message type: " + StringUtil.simpleClassName(msg));
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user