Epoll: Avoid redundant EPOLL_CTL_MOD calls (#9397)

Motivation

Currently an epoll_ctl syscall is made every time there is a change to
the event interest flags (EPOLLIN, EPOLLOUT, etc) of a channel. These
are only done in the event loop so can be aggregated into 0 or 1 such
calls per channel prior to the next call to epoll_wait.

Modifications

I think further streamlining/simplification is possible but for now I've
tried to minimize structural changes and added the aggregation beneath
the existing flag manipulation logic.

A new AbstractChannel#activeFlags field records the flags last set on
the epoll fd for that channel. Calls to setFlag/clearFlag update the
flags field as before but instead of calling epoll_ctl immediately, just
set or clear a bit for the channel in a new bitset in the associated
EpollEventLoop to reflect whether there's any change to the last set
value.

Prior to calling epoll_wait the event loop makes the appropriate
epoll_ctl(EPOLL_CTL_MOD) call once for each channel who's bit is set.

Result

Fewer syscalls, particularly in some auto-read=false cases. Simplified
error handling from centralization of these calls.
This commit is contained in:
Nick Hill 2019-08-18 23:24:42 -07:00 committed by Norman Maurer
parent 67b851209f
commit 250b279bd9
4 changed files with 74 additions and 68 deletions

View File

@ -69,7 +69,8 @@ abstract class AbstractEpollChannel extends AbstractChannel implements UnixChann
private volatile SocketAddress local;
private volatile SocketAddress remote;
protected int flags = Native.EPOLLET;
protected int flags = Native.EPOLLET | Native.EPOLLIN;
protected int activeFlags;
boolean inputClosedSeenErrorOnRead;
boolean epollInReadyRunnablePending;
@ -109,17 +110,23 @@ abstract class AbstractEpollChannel extends AbstractChannel implements UnixChann
}
}
void setFlag(int flag) throws IOException {
void setFlag(int flag) {
if (!isFlagSet(flag)) {
flags |= flag;
modifyEvents();
updatePendingFlagsSet();
}
}
void clearFlag(int flag) throws IOException {
void clearFlag(int flag) {
if (isFlagSet(flag)) {
flags &= ~flag;
modifyEvents();
updatePendingFlagsSet();
}
}
private void updatePendingFlagsSet() {
if (isRegistered() && registration != null) {
registration.update();
}
}
@ -247,33 +254,27 @@ abstract class AbstractEpollChannel extends AbstractChannel implements UnixChann
((SocketChannelConfig) config).isAllowHalfClosure();
}
private Runnable clearEpollInTask;
final void clearEpollIn() {
// Only clear if registered with an EventLoop as otherwise
if (isRegistered()) {
final EventLoop loop = eventLoop();
final AbstractEpollUnsafe unsafe = (AbstractEpollUnsafe) unsafe();
if (loop.inEventLoop()) {
unsafe.clearEpollIn0();
} else {
// schedule a task to clear the EPOLLIN as it is not safe to modify it directly
loop.execute(() -> {
if (!unsafe.readPending && !config().isAutoRead()) {
// Still no read triggered so clear it now
unsafe.clearEpollIn0();
}
});
}
} else {
// The EventLoop is not registered atm so just update the flags so the correct value
// will be used once the channel is registered
flags &= ~Native.EPOLLIN;
final EventLoop loop = isRegistered() ? eventLoop() : null;
final AbstractEpollUnsafe unsafe = (AbstractEpollUnsafe) unsafe();
if (loop == null || loop.inEventLoop()) {
unsafe.clearEpollIn0();
return;
}
}
private void modifyEvents() throws IOException {
if (isOpen() && isRegistered() && registration != null) {
registration.update();
// schedule a task to clear the EPOLLIN as it is not safe to modify it directly
Runnable clearFlagTask = clearEpollInTask;
if (clearFlagTask == null) {
clearEpollInTask = clearFlagTask = () -> {
if (!unsafe.readPending && !config().isAutoRead()) {
// Still no read triggered so clear it now
unsafe.clearEpollIn0();
}
};
}
loop.execute(clearFlagTask);
}
@Override
@ -405,7 +406,7 @@ abstract class AbstractEpollChannel extends AbstractChannel implements UnixChann
// * The user called Channel.read() or ChannelHandlerContext.read() in channelReadComplete(...) method
//
// See https://github.com/netty/netty/issues/2254
clearEpollIn();
clearEpollIn0();
}
}
@ -435,19 +436,7 @@ abstract class AbstractEpollChannel extends AbstractChannel implements UnixChann
}
// Clear the EPOLLRDHUP flag to prevent continuously getting woken up on this event.
clearEpollRdHup();
}
/**
* Clear the {@link Native#EPOLLRDHUP} flag from EPOLL, and close on failure.
*/
private void clearEpollRdHup() {
try {
clearFlag(Native.EPOLLRDHUP);
} catch (IOException e) {
pipeline().fireExceptionCaught(e);
close(voidPromise());
}
clearFlag(Native.EPOLLRDHUP);
}
/**
@ -467,7 +456,7 @@ abstract class AbstractEpollChannel extends AbstractChannel implements UnixChann
// We attempted to shutdown and failed, which means the input has already effectively been
// shutdown.
}
clearEpollIn();
clearEpollIn0();
pipeline().fireUserEventTriggered(ChannelInputShutdownEvent.INSTANCE);
} else {
close(voidPromise());
@ -523,16 +512,9 @@ abstract class AbstractEpollChannel extends AbstractChannel implements UnixChann
}
protected final void clearEpollIn0() {
assert eventLoop().inEventLoop();
try {
readPending = false;
clearFlag(Native.EPOLLIN);
} catch (IOException e) {
// When this happens there is something completely wrong with either the filedescriptor or epoll,
// so fire the exception through the pipeline and close the Channel.
pipeline().fireExceptionCaught(e);
unsafe().close(unsafe().voidPromise());
}
assert !isRegistered() || eventLoop().inEventLoop();
readPending = false;
clearFlag(Native.EPOLLIN);
}
@Override
@ -652,7 +634,7 @@ abstract class AbstractEpollChannel extends AbstractChannel implements UnixChann
/**
* Finish the connect
*/
private boolean doFinishConnect() throws Exception {
private boolean doFinishConnect() throws IOException {
if (socket.finishConnect()) {
clearFlag(Native.EPOLLOUT);
if (requestedRemoteAddress instanceof InetSocketAddress) {

View File

@ -16,14 +16,12 @@
package io.netty.channel.epoll;
import io.netty.buffer.ByteBufAllocator;
import io.netty.channel.ChannelException;
import io.netty.channel.ChannelOption;
import io.netty.channel.DefaultChannelConfig;
import io.netty.channel.MessageSizeEstimator;
import io.netty.channel.RecvByteBufAllocator;
import io.netty.channel.WriteBufferWaterMark;
import java.io.IOException;
import java.util.Map;
import static io.netty.channel.unix.Limits.SSIZE_MAX;
@ -149,7 +147,6 @@ public class EpollChannelConfig extends DefaultChannelConfig {
*/
public EpollChannelConfig setEpollMode(EpollMode mode) {
requireNonNull(mode, "mode");
try {
switch (mode) {
case EDGE_TRIGGERED:
checkChannelNotRegistered();
@ -161,9 +158,6 @@ public class EpollChannelConfig extends DefaultChannelConfig {
break;
default:
throw new Error();
}
} catch (IOException e) {
throw new ChannelException(e);
}
return this;
}

View File

@ -36,6 +36,7 @@ import io.netty.util.internal.logging.InternalLogger;
import io.netty.util.internal.logging.InternalLoggerFactory;
import java.io.IOException;
import java.util.BitSet;
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
import static io.netty.util.internal.ObjectUtil.checkPositiveOrZero;
@ -62,6 +63,7 @@ public class EpollHandler implements IoHandler {
private final FileDescriptor eventFd;
private final FileDescriptor timerFd;
private final IntObjectMap<AbstractEpollChannel> channels = new IntObjectHashMap<>(4096);
private final BitSet pendingFlagChannels = new BitSet();
private final boolean allowGrowing;
private final EpollEventArray events;
@ -188,8 +190,8 @@ public class EpollHandler implements IoHandler {
final AbstractEpollChannel epollChannel = cast(channel);
epollChannel.register0(new EpollRegistration() {
@Override
public void update() throws IOException {
EpollHandler.this.modify(epollChannel);
public void update() {
EpollHandler.this.updatePendingFlagsSet(epollChannel);
}
@Override
@ -229,6 +231,8 @@ public class EpollHandler implements IoHandler {
private void add(AbstractEpollChannel ch) throws IOException {
int fd = ch.socket.intValue();
Native.epollCtlAdd(epollFd.intValue(), fd, ch.flags);
ch.activeFlags = ch.flags;
AbstractEpollChannel old = channels.put(fd, ch);
// We either expect to have no Channel in the map with the same FD or that the FD of the old Channel is already
@ -236,11 +240,32 @@ public class EpollHandler implements IoHandler {
assert old == null || !old.isOpen();
}
void updatePendingFlagsSet(AbstractEpollChannel ch) {
pendingFlagChannels.set(ch.socket.intValue(), ch.flags != ch.activeFlags);
}
private void processPendingChannelFlags() {
// Call epollCtlMod for any channels that require event interest changes before epollWaiting
if (!pendingFlagChannels.isEmpty()) {
for (int fd = 0; (fd = pendingFlagChannels.nextSetBit(fd)) >= 0; pendingFlagChannels.clear(fd)) {
AbstractEpollChannel ch = channels.get(fd);
if (ch != null) {
try {
modify(ch);
} catch (IOException e) {
ch.pipeline().fireExceptionCaught(e);
ch.close();
}
}
}
}
}
/**
* The flags of the given epoll was modified so update the registration
*/
private void modify(AbstractEpollChannel ch) throws IOException {
Native.epollCtlMod(epollFd.intValue(), ch.socket.intValue(), ch.flags);
ch.activeFlags = ch.flags;
}
/**
@ -256,10 +281,14 @@ public class EpollHandler implements IoHandler {
// If we found another Channel in the map that is mapped to the same FD the given Channel MUST be closed.
assert !ch.isOpen();
} else if (ch.isOpen()) {
// Remove the epoll. This is only needed if it's still open as otherwise it will be automatically
// removed once the file-descriptor is closed.
Native.epollCtlDel(epollFd.intValue(), fd);
} else {
ch.activeFlags = 0;
pendingFlagChannels.clear(fd);
if (ch.isOpen()) {
// Remove the epoll. This is only needed if it's still open as otherwise it will be automatically
// removed once the file-descriptor is closed.
Native.epollCtlDel(epollFd.intValue(), fd);
}
}
}
@ -291,6 +320,7 @@ public class EpollHandler implements IoHandler {
public final int run(IoExecutionContext context) {
int handled = 0;
try {
processPendingChannelFlags();
int strategy = selectStrategy.calculateStrategy(selectNowSupplier, !context.canBlock());
switch (strategy) {
case SelectStrategy.CONTINUE:

View File

@ -20,14 +20,14 @@ import io.netty.channel.unix.IovArray;
import java.io.IOException;
/**
* Registration with an {@link EpollEventLoop}.
* Registration with an {@link EpollHandler}.
*/
interface EpollRegistration {
/**
* Update the registration as some flags did change.
*/
void update() throws IOException;
void update();
/**
* Remove the registration. No more IO will be handled for it.