diff --git a/transport/src/main/java/io/netty/channel/socket/nio/NioEventLoop.java b/transport/src/main/java/io/netty/channel/socket/nio/NioEventLoop.java index 670d43df14..401236ab56 100644 --- a/transport/src/main/java/io/netty/channel/socket/nio/NioEventLoop.java +++ b/transport/src/main/java/io/netty/channel/socket/nio/NioEventLoop.java @@ -24,9 +24,7 @@ import io.netty.logging.InternalLogger; import io.netty.logging.InternalLoggerFactory; import java.io.IOException; -import java.nio.channels.CancelledKeyException; -import java.nio.channels.SelectionKey; -import java.nio.channels.Selector; +import java.nio.channels.*; import java.nio.channels.spi.SelectorProvider; import java.util.ArrayList; import java.util.Collection; @@ -55,8 +53,10 @@ final class NioEventLoop extends SingleThreadEventLoop { /** * The NIO {@link Selector}. */ - protected final Selector selector; + protected Selector selector; + protected final SelectorProvider provider; + /** * Boolean that controls determines if a blocked Selector.select should * break out of its selection process. In our case we use a timeone for @@ -75,10 +75,11 @@ final class NioEventLoop extends SingleThreadEventLoop { if (selectorProvider == null) { throw new NullPointerException("selectorProvider"); } - selector = openSelector(selectorProvider); + provider = selectorProvider; + selector = openSelector(); } - private static Selector openSelector(SelectorProvider provider) { + private Selector openSelector() { try { return provider.openSelector(); } catch (IOException e) { @@ -92,15 +93,79 @@ final class NioEventLoop extends SingleThreadEventLoop { return new ConcurrentLinkedQueue(); } + // Create a new selector and "transfer" all channels from the old + // selector to the new one + private Selector recreateSelector() { + Selector newSelector = openSelector(); + Selector selector = this.selector; + this.selector = newSelector; + + // loop over all the keys that are registered with the old Selector + // and register them with the new one + for (SelectionKey key: selector.keys()) { + SelectableChannel ch = key.channel(); + int ops = key.interestOps(); + Object att = key.attachment(); + // cancel the old key + cancel(key); + + try { + // register the channel with the new selector now + ch.register(newSelector, ops, att); + } catch (ClosedChannelException e) { + // close channel + AbstractNioChannel channel = (AbstractNioChannel) att; + channel.unsafe().close(channel.unsafe().voidFuture()); + } + } + try { + // time to close the old selector as everything else is registered to the new one + selector.close(); + } catch (Throwable t) { + logger.warn("Failed to close a selector.", t); + } + logger.warn("Recreated Selector because of possible jdk epoll(..) bug"); + return newSelector; + } @Override protected void run() { Selector selector = this.selector; + int selectReturnsImmediately = 0; + + // use 80% of the timeout for measure + long minSelectTimeout = SelectorUtil.SELECT_TIMEOUT_NANOS / 100 * 80; + for (;;) { wakenUp.set(false); try { - SelectorUtil.select(selector); + long beforeSelect = System.nanoTime(); + int selected = SelectorUtil.select(selector); + if (selected == 0) { + long timeBlocked = System.nanoTime() - beforeSelect; + if (timeBlocked < minSelectTimeout) { + // returned before the minSelectTimeout elapsed with nothing select. + // this may be the cause of the jdk epoll(..) bug, so increment the counter + // which we use later to see if its really the jdk bug. + selectReturnsImmediately ++; + } else { + selectReturnsImmediately = 0; + } + if (selectReturnsImmediately == 10) { + // The selector returned immediately for 10 times in a row, + // so recreate one selector as it seems like we hit the + // famous epoll(..) jdk bug. + selector = recreateSelector(); + selectReturnsImmediately = 0; + + // try to select again + continue; + } + } else { + // reset counter + selectReturnsImmediately = 0; + } // 'wakenUp.compareAndSet(false, true)' is always evaluated // before calling 'selector.wakeup()' to reduce the wake-up diff --git a/transport/src/main/java/io/netty/channel/socket/nio/SelectorUtil.java b/transport/src/main/java/io/netty/channel/socket/nio/SelectorUtil.java index 29fd092589..1a88de668c 100644 --- a/transport/src/main/java/io/netty/channel/socket/nio/SelectorUtil.java +++ b/transport/src/main/java/io/netty/channel/socket/nio/SelectorUtil.java @@ -21,12 +21,14 @@ import io.netty.logging.InternalLoggerFactory; import java.io.IOException; import java.nio.channels.CancelledKeyException; import java.nio.channels.Selector; +import java.util.concurrent.TimeUnit; final class SelectorUtil { private static final InternalLogger logger = InternalLoggerFactory.getInstance(SelectorUtil.class); static final long DEFAULT_SELECT_TIMEOUT = 10; static final long SELECT_TIMEOUT; + static final long SELECT_TIMEOUT_NANOS; // Workaround for JDK NIO bug. // @@ -53,12 +55,13 @@ final class SelectorUtil { selectTimeout = DEFAULT_SELECT_TIMEOUT; } SELECT_TIMEOUT = selectTimeout; + SELECT_TIMEOUT_NANOS = TimeUnit.MILLISECONDS.toMicros(SELECT_TIMEOUT); logger.debug("Using select timeout of " + SELECT_TIMEOUT); } - static void select(Selector selector) throws IOException { + static int select(Selector selector) throws IOException { try { - selector.select(SELECT_TIMEOUT); + return selector.select(SELECT_TIMEOUT); } catch (CancelledKeyException e) { if (logger.isDebugEnabled()) { logger.debug( @@ -67,6 +70,7 @@ final class SelectorUtil { } // Harmless exception - log anyway } + return -1; } static void cleanupKeys(Selector selector) {