Motivation: Avoid unnecessary iteration and `ArrayList` allocation. Modification: ``` for (AbstractEpollChannel channel: channels.values()) { array.add(channel); } ``` replaced with `array.addAll(channels.values())` and ``` Collection<AbstractEpollChannel> array = new ArrayList<AbstractEpollChannel>(channels.size()); array.addAll(channels.values()) ``` replaced with: `AbstractEpollChannel[] localChannels = channels.values().toArray(new AbstractEpollChannel[0]);` Result: Simpler code in `EpollEventLoop.closeAll();`
483 lines
19 KiB
Java
483 lines
19 KiB
Java
/*
|
|
* Copyright 2014 The Netty Project
|
|
*
|
|
* The Netty Project licenses this file to you under the Apache License,
|
|
* version 2.0 (the "License"); you may not use this file except in compliance
|
|
* with the License. You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
* License for the specific language governing permissions and limitations
|
|
* under the License.
|
|
*/
|
|
package io.netty.channel.epoll;
|
|
|
|
import io.netty.channel.EventLoop;
|
|
import io.netty.channel.EventLoopGroup;
|
|
import io.netty.channel.SelectStrategy;
|
|
import io.netty.channel.SingleThreadEventLoop;
|
|
import io.netty.channel.epoll.AbstractEpollChannel.AbstractEpollUnsafe;
|
|
import io.netty.channel.unix.FileDescriptor;
|
|
import io.netty.channel.unix.IovArray;
|
|
import io.netty.util.IntSupplier;
|
|
import io.netty.util.collection.IntObjectHashMap;
|
|
import io.netty.util.collection.IntObjectMap;
|
|
import io.netty.util.concurrent.RejectedExecutionHandler;
|
|
import io.netty.util.internal.ObjectUtil;
|
|
import io.netty.util.internal.PlatformDependent;
|
|
import io.netty.util.internal.logging.InternalLogger;
|
|
import io.netty.util.internal.logging.InternalLoggerFactory;
|
|
|
|
import java.io.IOException;
|
|
import java.util.Queue;
|
|
import java.util.concurrent.Executor;
|
|
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
|
|
|
|
import static java.lang.Math.min;
|
|
|
|
/**
|
|
* {@link EventLoop} which uses epoll under the covers. Only works on Linux!
|
|
*/
|
|
class EpollEventLoop extends SingleThreadEventLoop {
|
|
private static final InternalLogger logger = InternalLoggerFactory.getInstance(EpollEventLoop.class);
|
|
private static final AtomicIntegerFieldUpdater<EpollEventLoop> WAKEN_UP_UPDATER =
|
|
AtomicIntegerFieldUpdater.newUpdater(EpollEventLoop.class, "wakenUp");
|
|
|
|
static {
|
|
// Ensure JNI is initialized by the time this class is loaded by this time!
|
|
// We use unix-common methods in this class which are backed by JNI methods.
|
|
Epoll.ensureAvailability();
|
|
}
|
|
|
|
// Pick a number that no task could have previously used.
|
|
private long prevDeadlineNanos = nanoTime() - 1;
|
|
private final FileDescriptor epollFd;
|
|
private final FileDescriptor eventFd;
|
|
private final FileDescriptor timerFd;
|
|
private final IntObjectMap<AbstractEpollChannel> channels = new IntObjectHashMap<AbstractEpollChannel>(4096);
|
|
private final boolean allowGrowing;
|
|
private final EpollEventArray events;
|
|
|
|
// These are initialized on first use
|
|
private IovArray iovArray;
|
|
private NativeDatagramPacketArray datagramPacketArray;
|
|
|
|
private final SelectStrategy selectStrategy;
|
|
private final IntSupplier selectNowSupplier = new IntSupplier() {
|
|
@Override
|
|
public int get() throws Exception {
|
|
return epollWaitNow();
|
|
}
|
|
};
|
|
@SuppressWarnings("unused") // AtomicIntegerFieldUpdater
|
|
private volatile int wakenUp;
|
|
private volatile int ioRatio = 50;
|
|
|
|
// See http://man7.org/linux/man-pages/man2/timerfd_create.2.html.
|
|
private static final long MAX_SCHEDULED_TIMERFD_NS = 999999999;
|
|
|
|
EpollEventLoop(EventLoopGroup parent, Executor executor, int maxEvents,
|
|
SelectStrategy strategy, RejectedExecutionHandler rejectedExecutionHandler) {
|
|
super(parent, executor, false, DEFAULT_MAX_PENDING_TASKS, rejectedExecutionHandler);
|
|
selectStrategy = ObjectUtil.checkNotNull(strategy, "strategy");
|
|
if (maxEvents == 0) {
|
|
allowGrowing = true;
|
|
events = new EpollEventArray(4096);
|
|
} else {
|
|
allowGrowing = false;
|
|
events = new EpollEventArray(maxEvents);
|
|
}
|
|
boolean success = false;
|
|
FileDescriptor epollFd = null;
|
|
FileDescriptor eventFd = null;
|
|
FileDescriptor timerFd = null;
|
|
try {
|
|
this.epollFd = epollFd = Native.newEpollCreate();
|
|
this.eventFd = eventFd = Native.newEventFd();
|
|
try {
|
|
Native.epollCtlAdd(epollFd.intValue(), eventFd.intValue(), Native.EPOLLIN);
|
|
} catch (IOException e) {
|
|
throw new IllegalStateException("Unable to add eventFd filedescriptor to epoll", e);
|
|
}
|
|
this.timerFd = timerFd = Native.newTimerFd();
|
|
try {
|
|
Native.epollCtlAdd(epollFd.intValue(), timerFd.intValue(), Native.EPOLLIN | Native.EPOLLET);
|
|
} catch (IOException e) {
|
|
throw new IllegalStateException("Unable to add timerFd filedescriptor to epoll", e);
|
|
}
|
|
success = true;
|
|
} finally {
|
|
if (!success) {
|
|
if (epollFd != null) {
|
|
try {
|
|
epollFd.close();
|
|
} catch (Exception e) {
|
|
// ignore
|
|
}
|
|
}
|
|
if (eventFd != null) {
|
|
try {
|
|
eventFd.close();
|
|
} catch (Exception e) {
|
|
// ignore
|
|
}
|
|
}
|
|
if (timerFd != null) {
|
|
try {
|
|
timerFd.close();
|
|
} catch (Exception e) {
|
|
// ignore
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Return a cleared {@link IovArray} that can be used for writes in this {@link EventLoop}.
|
|
*/
|
|
IovArray cleanIovArray() {
|
|
if (iovArray == null) {
|
|
iovArray = new IovArray();
|
|
} else {
|
|
iovArray.clear();
|
|
}
|
|
return iovArray;
|
|
}
|
|
|
|
/**
|
|
* Return a cleared {@link NativeDatagramPacketArray} that can be used for writes in this {@link EventLoop}.
|
|
*/
|
|
NativeDatagramPacketArray cleanDatagramPacketArray() {
|
|
if (datagramPacketArray == null) {
|
|
datagramPacketArray = new NativeDatagramPacketArray();
|
|
} else {
|
|
datagramPacketArray.clear();
|
|
}
|
|
return datagramPacketArray;
|
|
}
|
|
|
|
@Override
|
|
protected void wakeup(boolean inEventLoop) {
|
|
if (!inEventLoop && WAKEN_UP_UPDATER.compareAndSet(this, 0, 1)) {
|
|
// write to the evfd which will then wake-up epoll_wait(...)
|
|
Native.eventFdWrite(eventFd.intValue(), 1L);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Register the given epoll with this {@link EventLoop}.
|
|
*/
|
|
void add(AbstractEpollChannel ch) throws IOException {
|
|
assert inEventLoop();
|
|
int fd = ch.socket.intValue();
|
|
Native.epollCtlAdd(epollFd.intValue(), fd, ch.flags);
|
|
channels.put(fd, ch);
|
|
}
|
|
|
|
/**
|
|
* The flags of the given epoll was modified so update the registration
|
|
*/
|
|
void modify(AbstractEpollChannel ch) throws IOException {
|
|
assert inEventLoop();
|
|
Native.epollCtlMod(epollFd.intValue(), ch.socket.intValue(), ch.flags);
|
|
}
|
|
|
|
/**
|
|
* Deregister the given epoll from this {@link EventLoop}.
|
|
*/
|
|
void remove(AbstractEpollChannel ch) throws IOException {
|
|
assert inEventLoop();
|
|
|
|
if (ch.isOpen()) {
|
|
int fd = ch.socket.intValue();
|
|
if (channels.remove(fd) != null) {
|
|
// Remove the epoll. This is only needed if it's still open as otherwise it will be automatically
|
|
// removed once the file-descriptor is closed.
|
|
Native.epollCtlDel(epollFd.intValue(), ch.fd().intValue());
|
|
}
|
|
}
|
|
}
|
|
|
|
@Override
|
|
protected Queue<Runnable> newTaskQueue(int maxPendingTasks) {
|
|
// This event loop never calls takeTask()
|
|
return maxPendingTasks == Integer.MAX_VALUE ? PlatformDependent.<Runnable>newMpscQueue()
|
|
: PlatformDependent.<Runnable>newMpscQueue(maxPendingTasks);
|
|
}
|
|
|
|
/**
|
|
* Returns the percentage of the desired amount of time spent for I/O in the event loop.
|
|
*/
|
|
public int getIoRatio() {
|
|
return ioRatio;
|
|
}
|
|
|
|
/**
|
|
* Sets the percentage of the desired amount of time spent for I/O in the event loop. The default value is
|
|
* {@code 50}, which means the event loop will try to spend the same amount of time for I/O as for non-I/O tasks.
|
|
*/
|
|
public void setIoRatio(int ioRatio) {
|
|
if (ioRatio <= 0 || ioRatio > 100) {
|
|
throw new IllegalArgumentException("ioRatio: " + ioRatio + " (expected: 0 < ioRatio <= 100)");
|
|
}
|
|
this.ioRatio = ioRatio;
|
|
}
|
|
|
|
private int epollWait(boolean oldWakeup) throws IOException {
|
|
// If a task was submitted when wakenUp value was 1, the task didn't get a chance to produce wakeup event.
|
|
// So we need to check task queue again before calling epoll_wait. If we don't, the task might be pended
|
|
// until epoll_wait was timed out. It might be pended until idle timeout if IdleStateHandler existed
|
|
// in pipeline.
|
|
if (oldWakeup && hasTasks()) {
|
|
return epollWaitNow();
|
|
}
|
|
|
|
int delaySeconds;
|
|
int delayNanos;
|
|
long curDeadlineNanos = deadlineNanos();
|
|
if (curDeadlineNanos == prevDeadlineNanos) {
|
|
delaySeconds = -1;
|
|
delayNanos = -1;
|
|
} else {
|
|
long totalDelay = delayNanos(System.nanoTime());
|
|
prevDeadlineNanos = curDeadlineNanos;
|
|
delaySeconds = (int) min(totalDelay / 1000000000L, Integer.MAX_VALUE);
|
|
delayNanos = (int) min(totalDelay - delaySeconds * 1000000000L, MAX_SCHEDULED_TIMERFD_NS);
|
|
}
|
|
return Native.epollWait(epollFd, events, timerFd, delaySeconds, delayNanos);
|
|
}
|
|
|
|
private int epollWaitNow() throws IOException {
|
|
return Native.epollWait(epollFd, events, timerFd, 0, 0);
|
|
}
|
|
|
|
private int epollBusyWait() throws IOException {
|
|
return Native.epollBusyWait(epollFd, events);
|
|
}
|
|
|
|
@Override
|
|
protected void run() {
|
|
for (;;) {
|
|
try {
|
|
int strategy = selectStrategy.calculateStrategy(selectNowSupplier, hasTasks());
|
|
switch (strategy) {
|
|
case SelectStrategy.CONTINUE:
|
|
continue;
|
|
|
|
case SelectStrategy.BUSY_WAIT:
|
|
strategy = epollBusyWait();
|
|
break;
|
|
|
|
case SelectStrategy.SELECT:
|
|
strategy = epollWait(WAKEN_UP_UPDATER.getAndSet(this, 0) == 1);
|
|
|
|
// 'wakenUp.compareAndSet(false, true)' is always evaluated
|
|
// before calling 'selector.wakeup()' to reduce the wake-up
|
|
// overhead. (Selector.wakeup() is an expensive operation.)
|
|
//
|
|
// However, there is a race condition in this approach.
|
|
// The race condition is triggered when 'wakenUp' is set to
|
|
// true too early.
|
|
//
|
|
// 'wakenUp' is set to true too early if:
|
|
// 1) Selector is waken up between 'wakenUp.set(false)' and
|
|
// 'selector.select(...)'. (BAD)
|
|
// 2) Selector is waken up between 'selector.select(...)' and
|
|
// 'if (wakenUp.get()) { ... }'. (OK)
|
|
//
|
|
// In the first case, 'wakenUp' is set to true and the
|
|
// following 'selector.select(...)' will wake up immediately.
|
|
// Until 'wakenUp' is set to false again in the next round,
|
|
// 'wakenUp.compareAndSet(false, true)' will fail, and therefore
|
|
// any attempt to wake up the Selector will fail, too, causing
|
|
// the following 'selector.select(...)' call to block
|
|
// unnecessarily.
|
|
//
|
|
// To fix this problem, we wake up the selector again if wakenUp
|
|
// is true immediately after selector.select(...).
|
|
// It is inefficient in that it wakes up the selector for both
|
|
// the first case (BAD - wake-up required) and the second case
|
|
// (OK - no wake-up required).
|
|
|
|
if (wakenUp == 1) {
|
|
Native.eventFdWrite(eventFd.intValue(), 1L);
|
|
}
|
|
// fallthrough
|
|
default:
|
|
}
|
|
|
|
final int ioRatio = this.ioRatio;
|
|
if (ioRatio == 100) {
|
|
try {
|
|
if (strategy > 0) {
|
|
processReady(events, strategy);
|
|
}
|
|
} finally {
|
|
// Ensure we always run tasks.
|
|
runAllTasks();
|
|
}
|
|
} else {
|
|
final long ioStartTime = System.nanoTime();
|
|
|
|
try {
|
|
if (strategy > 0) {
|
|
processReady(events, strategy);
|
|
}
|
|
} finally {
|
|
// Ensure we always run tasks.
|
|
final long ioTime = System.nanoTime() - ioStartTime;
|
|
runAllTasks(ioTime * (100 - ioRatio) / ioRatio);
|
|
}
|
|
}
|
|
if (allowGrowing && strategy == events.length()) {
|
|
//increase the size of the array as we needed the whole space for the events
|
|
events.increase();
|
|
}
|
|
} catch (Throwable t) {
|
|
handleLoopException(t);
|
|
}
|
|
// Always handle shutdown even if the loop processing threw an exception.
|
|
try {
|
|
if (isShuttingDown()) {
|
|
closeAll();
|
|
if (confirmShutdown()) {
|
|
break;
|
|
}
|
|
}
|
|
} catch (Throwable t) {
|
|
handleLoopException(t);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Visible only for testing!
|
|
*/
|
|
void handleLoopException(Throwable t) {
|
|
logger.warn("Unexpected exception in the selector loop.", t);
|
|
|
|
// Prevent possible consecutive immediate failures that lead to
|
|
// excessive CPU consumption.
|
|
try {
|
|
Thread.sleep(1000);
|
|
} catch (InterruptedException e) {
|
|
// Ignore.
|
|
}
|
|
}
|
|
|
|
private void closeAll() {
|
|
try {
|
|
epollWaitNow();
|
|
} catch (IOException ignore) {
|
|
// ignore on close
|
|
}
|
|
// Using the intermediate collection to prevent ConcurrentModificationException.
|
|
// In the `close()` method, the channel is deleted from `channels` map.
|
|
AbstractEpollChannel[] localChannels = channels.values().toArray(new AbstractEpollChannel[0]);
|
|
|
|
for (AbstractEpollChannel ch : localChannels) {
|
|
ch.unsafe().close(ch.unsafe().voidPromise());
|
|
}
|
|
}
|
|
|
|
private void processReady(EpollEventArray events, int ready) {
|
|
for (int i = 0; i < ready; i ++) {
|
|
final int fd = events.fd(i);
|
|
if (fd == eventFd.intValue()) {
|
|
// consume wakeup event.
|
|
Native.eventFdRead(fd);
|
|
} else if (fd == timerFd.intValue()) {
|
|
// consume wakeup event, necessary because the timer is added with ET mode.
|
|
Native.timerFdRead(fd);
|
|
} else {
|
|
final long ev = events.events(i);
|
|
|
|
AbstractEpollChannel ch = channels.get(fd);
|
|
if (ch != null) {
|
|
// Don't change the ordering of processing EPOLLOUT | EPOLLRDHUP / EPOLLIN if you're not 100%
|
|
// sure about it!
|
|
// Re-ordering can easily introduce bugs and bad side-effects, as we found out painfully in the
|
|
// past.
|
|
AbstractEpollUnsafe unsafe = (AbstractEpollUnsafe) ch.unsafe();
|
|
|
|
// First check for EPOLLOUT as we may need to fail the connect ChannelPromise before try
|
|
// to read from the file descriptor.
|
|
// See https://github.com/netty/netty/issues/3785
|
|
//
|
|
// It is possible for an EPOLLOUT or EPOLLERR to be generated when a connection is refused.
|
|
// In either case epollOutReady() will do the correct thing (finish connecting, or fail
|
|
// the connection).
|
|
// See https://github.com/netty/netty/issues/3848
|
|
if ((ev & (Native.EPOLLERR | Native.EPOLLOUT)) != 0) {
|
|
// Force flush of data as the epoll is writable again
|
|
unsafe.epollOutReady();
|
|
}
|
|
|
|
// Check EPOLLIN before EPOLLRDHUP to ensure all data is read before shutting down the input.
|
|
// See https://github.com/netty/netty/issues/4317.
|
|
//
|
|
// If EPOLLIN or EPOLLERR was received and the channel is still open call epollInReady(). This will
|
|
// try to read from the underlying file descriptor and so notify the user about the error.
|
|
if ((ev & (Native.EPOLLERR | Native.EPOLLIN)) != 0) {
|
|
// The Channel is still open and there is something to read. Do it now.
|
|
unsafe.epollInReady();
|
|
}
|
|
|
|
// Check if EPOLLRDHUP was set, this will notify us for connection-reset in which case
|
|
// we may close the channel directly or try to read more data depending on the state of the
|
|
// Channel and als depending on the AbstractEpollChannel subtype.
|
|
if ((ev & Native.EPOLLRDHUP) != 0) {
|
|
unsafe.epollRdHupReady();
|
|
}
|
|
} else {
|
|
// We received an event for an fd which we not use anymore. Remove it from the epoll_event set.
|
|
try {
|
|
Native.epollCtlDel(epollFd.intValue(), fd);
|
|
} catch (IOException ignore) {
|
|
// This can happen but is nothing we need to worry about as we only try to delete
|
|
// the fd from the epoll set as we not found it in our mappings. So this call to
|
|
// epollCtlDel(...) is just to ensure we cleanup stuff and so may fail if it was
|
|
// deleted before or the file descriptor was closed before.
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
@Override
|
|
protected void cleanup() {
|
|
try {
|
|
try {
|
|
epollFd.close();
|
|
} catch (IOException e) {
|
|
logger.warn("Failed to close the epoll fd.", e);
|
|
}
|
|
try {
|
|
eventFd.close();
|
|
} catch (IOException e) {
|
|
logger.warn("Failed to close the event fd.", e);
|
|
}
|
|
try {
|
|
timerFd.close();
|
|
} catch (IOException e) {
|
|
logger.warn("Failed to close the timer fd.", e);
|
|
}
|
|
} finally {
|
|
// release native memory
|
|
if (iovArray != null) {
|
|
iovArray.release();
|
|
iovArray = null;
|
|
}
|
|
if (datagramPacketArray != null) {
|
|
datagramPacketArray.release();
|
|
datagramPacketArray = null;
|
|
}
|
|
events.free();
|
|
}
|
|
}
|
|
}
|