Re-create Selector if we hit the epoll(..) jdk bug which leads to 100% cpu load. This is just a workaround but helps to recover. See #327
This commit is contained in:
parent
1a6e7b4be1
commit
ff3f2b6361
@ -22,9 +22,11 @@ import java.nio.channels.AsynchronousCloseException;
|
||||
import java.nio.channels.CancelledKeyException;
|
||||
import java.nio.channels.ClosedChannelException;
|
||||
import java.nio.channels.NotYetConnectedException;
|
||||
import java.nio.channels.SelectionKey;
|
||||
import java.nio.channels.SelectableChannel;
|
||||
import java.nio.channels.Selector;
|
||||
import java.nio.channels.SelectionKey;
|
||||
import java.nio.channels.WritableByteChannel;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
@ -145,7 +147,6 @@ abstract class AbstractNioWorker implements Worker {
|
||||
synchronized (startStopLock) {
|
||||
Selector selector = start();
|
||||
|
||||
|
||||
boolean offered = registerTaskQueue.offer(registerTask);
|
||||
assert offered;
|
||||
|
||||
@ -163,6 +164,37 @@ abstract class AbstractNioWorker implements Worker {
|
||||
}
|
||||
}
|
||||
|
||||
// Create a new selector and "transfer" all channels from the old
|
||||
// selector to the new one
|
||||
private Selector recreateSelector() throws IOException {
|
||||
Selector newSelector = Selector.open();
|
||||
Selector selector = this.selector;
|
||||
this.selector = newSelector;
|
||||
|
||||
// loop over all the keys that are registered with the old Selector
|
||||
// and register them with the new one
|
||||
for (SelectionKey key: selector.keys()) {
|
||||
SelectableChannel ch = key.channel();
|
||||
int ops = key.interestOps();
|
||||
Object att = key.attachment();
|
||||
// cancel the old key
|
||||
key.cancel();
|
||||
|
||||
// register the channel with the new selector now
|
||||
ch.register(newSelector, ops, att);
|
||||
key.cancel();
|
||||
|
||||
}
|
||||
try {
|
||||
// time to close the old selector as everything else is registered to the new one
|
||||
selector.close();
|
||||
} catch (Throwable t) {
|
||||
logger.warn("Failed to close a selector.", t);
|
||||
}
|
||||
logger.debug("Recreated Selector because of possible jdk epoll(..) bug");
|
||||
return newSelector;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the {@link AbstractNioWorker} and return the {@link Selector} that will be used for
|
||||
* the {@link AbstractNioChannel}'s when they get registered
|
||||
@ -208,6 +240,7 @@ abstract class AbstractNioWorker implements Worker {
|
||||
thread = Thread.currentThread();
|
||||
|
||||
boolean shutdown = false;
|
||||
int selectReturnsImmediately = 0;
|
||||
Selector selector = this.selector;
|
||||
for (;;) {
|
||||
wakenUp.set(false);
|
||||
@ -220,7 +253,32 @@ abstract class AbstractNioWorker implements Worker {
|
||||
}
|
||||
|
||||
try {
|
||||
SelectorUtil.select(selector);
|
||||
long beforeSelect = System.currentTimeMillis();
|
||||
int selected = SelectorUtil.select(selector);
|
||||
if (selected == 0) {
|
||||
long timeBlocked = System.currentTimeMillis() - beforeSelect;
|
||||
if (timeBlocked < SelectorUtil.SELECT_WAIT_TIME) {
|
||||
// returned before the SELECT_WAIT_TIME elapsed with nothing select.
|
||||
// this may be the cause of the jdk epoll(..) bug, so increment the counter
|
||||
// which we use later to see if its really the jdk bug.
|
||||
selectReturnsImmediately++;
|
||||
} else {
|
||||
selectReturnsImmediately = 0;
|
||||
}
|
||||
if (selectReturnsImmediately == 10) {
|
||||
// The selector returned immediately for 10 times in a row,
|
||||
// so recreate one selector as it seems like we hit the
|
||||
// famous epoll(..) jdk bug.
|
||||
selector = recreateSelector();
|
||||
selectReturnsImmediately = 0;
|
||||
|
||||
// try to select again
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// reset counter
|
||||
selectReturnsImmediately = 0;
|
||||
}
|
||||
|
||||
// 'wakenUp.compareAndSet(false, true)' is always evaluated
|
||||
// before calling 'selector.wakeup()' to reduce the wake-up
|
||||
|
@ -28,6 +28,8 @@ final class SelectorUtil {
|
||||
|
||||
static final int DEFAULT_IO_THREADS = Runtime.getRuntime().availableProcessors() * 2;
|
||||
|
||||
static final int SELECT_WAIT_TIME = 10;
|
||||
|
||||
// Workaround for JDK NIO bug.
|
||||
//
|
||||
// See:
|
||||
@ -47,9 +49,9 @@ final class SelectorUtil {
|
||||
}
|
||||
}
|
||||
|
||||
static void select(Selector selector) throws IOException {
|
||||
static int select(Selector selector) throws IOException {
|
||||
try {
|
||||
selector.select(10);
|
||||
return selector.select(SELECT_WAIT_TIME);
|
||||
} catch (CancelledKeyException e) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug(
|
||||
@ -58,6 +60,7 @@ final class SelectorUtil {
|
||||
}
|
||||
// Harmless exception - log anyway
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private SelectorUtil() {
|
||||
|
Loading…
x
Reference in New Issue
Block a user