xbox-kernel/private/ntos/xnet/tcp/tcpconn.c
2020-09-30 17:17:25 +02:00

709 lines
15 KiB
C

/*++
Copyright (c) 2000 Microsoft Corporation
Module Name:
tcpconn.c
Abstract:
TCP connection management functions
Revision History:
06/05/2000 davidx
Created it.
--*/
#include "precomp.h"
//
// Maximum segment lifetime (30 seconds)
//
UINT cfgMSL = 30 * SLOW_TCP_TIMER_FREQ;
//
// Timeout period for active connection requests
// NOTE: Non-standard behavior!!!
// Our connection timeout value of 20sec is much shorter
// than the usual value of 75sec.
//
UINT cfgConnectTimeout = 20 * SLOW_TCP_TIMER_FREQ;
//
// Maximum backlogs for a listening socket
//
INT cfgMaxListenBacklog = 5;
//
// Default linger time when a connection is gracefully closed (2 minutes)
//
USHORT cfgDefaultLingerTime = 2*60;
//
// Default retransmission timeout (3 seconds)
//
UINT cfgDefaultRTO = 3*SLOW_TCP_TIMER_FREQ;
//
// List of TCP connections that has been reset but not yet closed
//
LIST_ENTRY DeadTcbList;
VOID
TcbInit(
TCB* tcb
)
/*++
Routine Description:
Initialize a TCP control block
Arguments:
tcb - Points to a newly allocated TCB structure
Return Value:
NONE
Note:
We assume all fields have been zero-initialized
when this function is called.
--*/
{
InitializeListHead(&tcb->listenq);
tcb->linger.l_linger = cfgDefaultLingerTime;
tcb->snd_mss = tcb->rcv_mss = TCP_DEFAULT_MSS;
tcb->snd_cwnd = tcb->snd_ssthresh = MAX_TCP_WNDSIZE;
tcb->srtt_8 = 0;
tcb->rttvar_4 = tcb->RTO = cfgDefaultRTO;
tcb->sendbufNext = TcbSendbufNil(tcb);
}
//
// Disassociate a child socket (i.e. pending connection
// request for a listening socket) from its parent
//
PRIVATE VOID TcbDisassociateChild(TCB* childTcb)
{
RemoveEntryList(&childTcb->listenq);
InitializeListHead(&childTcb->listenq);
childTcb->parent->backlog--;
childTcb->parent = NULL;
}
//
// Trim the backlog of pending connection requests
//
INLINE VOID TcbTrimConnReqBacklog(TCB* tcb, UINT maxBacklog) {
while (tcb->backlog > maxBacklog) {
TCB* childTcb = CONTAINING_RECORD(tcb->listenq.Blink, TCB, listenq);
TcbDisassociateChild(childTcb);
TcbDelete(childTcb);
}
}
BOOL
TcbClose(
TCB* tcb,
BOOL forceful
)
/*++
Routine Description:
Close a TCP connection
Arguments:
tcb - Points to the TCB structure
forceful - Forceful close, always succeed and no wait
Return Value:
TRUE if the TCB structure can be deleted immediately.
FALSE if the connection is being gracefully closed
and the TCB structure should be kept around until
the connection is really closed.
--*/
{
NTSTATUS status;
//
// Try to gracefully close the socket if requested by the caller
//
if (!forceful && (tcb->tcpstate >= TCPST_SYN_RECEIVED) &&
(!tcb->linger.l_onoff || tcb->linger.l_linger > 0)) {
status = TcbShutdown(tcb, PCBFLAG_BOTH_SHUTDOWN, FALSE);
if (NT_SUCCESS(status)) {
UINT linger = tcb->linger.l_onoff ?
tcb->linger.l_linger :
cfgDefaultLingerTime;
// TIME-WAIT and linger timers are the same
TcbSetTimeWaitTimer(tcb, linger * SLOW_TCP_TIMER_FREQ);
return FALSE;
}
}
if (tcb->parent) {
// This was a pending connection request
// for a listening socket.
TcbDisassociateChild(tcb);
} else {
// Clear all pending connection requests
TcbTrimConnReqBacklog(tcb, 0);
}
return TRUE;
}
NTSTATUS
TcbShutdown(
TCB* tcb,
BYTE flags,
BOOL apicall
)
/*++
Routine Description:
Gracefully shutdown a TCP connection
Arguments:
tcb - Points to the TCP control block
flags - Shutdown flags: PCBFLAG_SEND_SHUTDOWN and/or PCBFLAG_RECV_SHUTDOWN
apicall - Whether this is called by shutdown API
Return Value:
Status code
--*/
{
NTSTATUS status = NETERR_OK;
KIRQL irql = RaiseToDpc();
if ((flags & PCBFLAG_RECV_SHUTDOWN) && !IsPcbRecvShutdown(tcb)) {
tcb->flags |= PCBFLAG_RECV_SHUTDOWN;
if (!IsTcpRecvBufEmpty(tcb)) {
//
// If the receive buffer is not empty,
// we'll send a RST to the connection peer
// and reset the connection.
//
TcbResetPeer(tcb);
status = apicall ? NETERR_OK : NETERR_CONNRESET;
tcb = TcbReset(tcb, NETERR_CONNRESET);
ASSERT(tcb != NULL);
} else {
// Cancel any pending overlapped receive requests
PcbClearOverlappedRecvs(tcb, NETERR(WSAESHUTDOWN));
}
}
if ((flags & PCBFLAG_SEND_SHUTDOWN) && !IsPcbSendShutdown(tcb)) {
// Cancel any pending overlapped send requests
PcbClearOverlappedSends(tcb, NETERR(WSAESHUTDOWN));
switch (tcb->tcpstate) {
case TCPST_SYN_RECEIVED:
case TCPST_ESTABLISHED:
status = TcbEmitFIN(tcb);
if (NT_SUCCESS(status)) {
TcbSetState(tcb, TCPST_FIN_WAIT_1, "Shutdown");
tcb->flags |= PCBFLAG_SEND_SHUTDOWN;
}
break;
case TCPST_CLOSE_WAIT:
status = TcbEmitFIN(tcb);
if (NT_SUCCESS(status)) {
TcbSetState(tcb, TCPST_LAST_ACK, "Shutdown");
tcb->flags |= PCBFLAG_SEND_SHUTDOWN;
}
break;
default:
ASSERT(tcb->tcpstate < TCPST_SYN_RECEIVED);
status = NETERR(WSAENOTCONN);
break;
}
}
flags &= ~PCBFLAG_REVIVABLE;
LowerFromDpc(irql);
return status;
}
TCB*
TcbReset(
TCB* tcb,
NTSTATUS status
)
/*++
Routine Description:
Reset a TCP connection
Arguments:
tcb - Points to the TCP control block
status - Specifies the cause of the reset
Return Value:
NULL if the specified TCB was deleted inside this call.
Otherwise, just return the input tcb parameter.
--*/
{
BOOL revivable;
ASSERT(!IsTcpIdleState(tcb));
if (IsPendingConnReqTcb(tcb) || TcbIsLingering(tcb)) {
TcbDelete(tcb);
return NULL;
}
// Remove the TCB from the active list
// and move it to the zombie list.
ASSERT(!IsListNull(&tcb->links));
RemoveEntryList(&tcb->links);
InsertHeadList(&DeadTcbList, &tcb->links);
tcb->errStatus = (status != NETERR_CONNRESET || IsTcpSyncState(tcb)) ?
status :
NETERR(WSAECONNREFUSED);
revivable = !IsTcpSyncState(tcb) &&
(tcb->flags & PCBFLAG_BOTH_SHUTDOWN) == 0;
tcb->flags |= PCBFLAG_BOTH_SHUTDOWN;
TcbSetState(tcb, TCPST_CLOSED, "Reset");
// Stop various timers
// NOTE: This is a little hacky - we're clearing all the fields
// from delayedAcks to rtt_tick in one shot. Be careful when you
// add or remove fields from the TCB structure.
ZeroMem(
&tcb->delayedAcks,
offsetof(TCB, rtt_tick) + sizeof(tcb->rtt_tick) - offsetof(TCB, delayedAcks));
TcbInit(tcb);
PcbSignalEvent(tcb, PCBEVENT_ALL);
PcbCleanup((PCB*) tcb, revivable);
return tcb;
}
TCB*
TcbCloneChild(
TCB* tcb
)
/*++
Routine Description:
Add a pending connection request to a listening socket
Arguments:
tcb - Points to the listening socket
Return Value:
Pointer to the child socket
NULL if there is an error
--*/
{
TCB* childTcb;
// Fail if the backlog is full
ASSERT(tcb->tcpstate == TCPST_LISTEN);
if (tcb->backlog >= tcb->maxBacklog)
return NULL;
// Allocate memory for the child socket
childTcb = (TCB*) PcbCreate(SOCK_STREAM, IPPROTOCOL_TCP, PCBFLAG_NETPOOL);
if (!childTcb) return NULL;
// Associate the child socket with its parent
childTcb->parent = tcb;
tcb->backlog++;
InsertTailList(&tcb->listenq, &childTcb->listenq);
// Copy the local address bindings
childTcb->flags |= PCBFLAG_BOUND;
childTcb->bindSrcAddr = tcb->bindSrcAddr;
childTcb->srcaddr = tcb->srcaddr;
childTcb->srcport = tcb->srcport;
if (tcb->bindIfp) {
CACHE_IFP_REFERENCE(childTcb->bindIfp, tcb->bindIfp);
}
// Copy socket options
childTcb->options = tcb->options;
if (tcb->ipoptlen) {
PcbSetIpOpts((PCB*) childTcb, tcb->ipopts, tcb->ipoptlen);
} else {
ASSERT(childTcb->ipopts == NULL);
}
return childTcb;
}
NTSTATUS
TcbListen(
TCB* tcb,
INT backlog
)
/*++
Routine Description:
Switch a TCP socket into listening state
Arguments:
tcb - Points to the TCP control block
backlog - Maximum number of pending connections
Return Value:
Status code
--*/
{
KIRQL irql;
NTSTATUS status;
if (!IsTcpIdleState(tcb))
return NETERR(WSAEISCONN);
irql = RaiseToDpc();
status = PcbGetErrStatus(tcb);
if (NT_SUCCESS(status)) {
TcbSetState(tcb, TCPST_LISTEN, "Listen");
tcb->maxBacklog = (backlog < 1) ? 1 :
(backlog > cfgMaxListenBacklog) ?
cfgMaxListenBacklog :
backlog;
// Trim down the backlog of pending connections if needed
TcbTrimConnReqBacklog(tcb, tcb->maxBacklog);
status = NETERR_OK;
}
LowerFromDpc(irql);
return status;
}
NTSTATUS
TcbAccept(
TCB* tcb,
TCB** newtcb
)
/*++
Routine Description:
Accept an incoming TCP connection request
Arguments:
tcb - Points to the TCP control block
newtcb - Returns a pointer to the newly accepted connection block
Return Value:
Status code
--*/
{
KIRQL irql;
LIST_ENTRY* entry;
NTSTATUS status = NETERR_CONNRESET;
irql = RaiseToDpc();
entry = tcb->listenq.Flink;
while (entry != &tcb->listenq) {
TCB* childTcb = CONTAINING_RECORD(entry, TCB, listenq);
if (IsPcbConnected(childTcb)) {
TcbDisassociateChild(childTcb);
*newtcb = childTcb;
status = NETERR_OK;
break;
}
entry = entry->Flink;
}
LowerFromDpc(irql);
return status;
}
BOOL
TcbHasConnectedPendingConnReq(
TCB* tcb
)
/*++
Routine Description:
Check to see if a listening socket has pending connection
requests that are in ESTABLISHED state.
Arguments:
TCB - Points to the TCP control block
Return Value:
TRUE if there is pending connections ready to be accepted
FALSE otherwise
--*/
{
KIRQL irql;
LIST_ENTRY* entry;
BOOL result = FALSE;
irql = RaiseToDpc();
entry = tcb->listenq.Flink;
while (entry != &tcb->listenq) {
TCB* childTcb = CONTAINING_RECORD(entry, TCB, listenq);
if (IsPcbConnected(childTcb)) {
result = TRUE;
break;
}
entry = entry->Flink;
}
LowerFromDpc(irql);
return result;
}
NTSTATUS
TcbConnect(
TCB* tcb,
IPADDR dstaddr,
IPPORT dstport,
BOOL synAck
)
/*++
Routine Description:
Make a TCP connection to the specified foreign address
Arguments:
tcb - Points to the TCP control block
dstaddr, dstport - Specifies the foreign address to connect to
synAck - Whether we're acknowledging a received SYN request
Return Value:
Status code
--*/
{
NTSTATUS status;
KIRQL irql;
RTE* rte;
if (dstport == 0 ||
dstaddr == 0 ||
IS_BCAST_IPADDR(dstaddr) ||
IS_MCAST_IPADDR(dstaddr))
return NETERR(WSAEADDRNOTAVAIL);
if (IsTcpListenState(tcb))
return NETERR_PARAM;
// Bind to a local address if necessary
if (!IsPcbBound(tcb)) {
status = PcbBind((PCB*) tcb, 0, 0);
if (!NT_SUCCESS(status)) return status;
}
irql = RaiseToDpc();
if (tcb->flags & PCBFLAG_REVIVABLE) {
// Revive a TCB that has been reset
tcb->connectSelected = 0;
tcb->flags &= ~(PCBFLAG_REVIVABLE|PCBFLAG_BOTH_SHUTDOWN);
tcb->eventFlags = 0;
KeClearEvent(GetPcbWaitEvent(tcb));
tcb->errStatus = NETERR_OK;
ASSERT(IsListEmpty(&tcb->listenq));
TcbInit(tcb);
// Move the TCB structure from the dead list
// back to the active list.
RemoveEntryList(&tcb->links);
InsertHeadList(&PcbList, &tcb->links);
} else {
status = PcbGetErrStatus(tcb);
if (!NT_SUCCESS(status)) goto exit;
}
if (!IsTcpIdleState(tcb) && !synAck) {
status = NETERR(WSAEALREADY);
goto exit;
}
// Find a route to the connected destination.
ASSERT(tcb->rte == NULL);
rte = tcb->rte = IpFindRTE(dstaddr, NULL);
if (!rte) {
status = NETERR_UNREACHABLE;
goto exit;
}
RteAddRef(rte);
if (tcb->bindSrcAddr == 0) {
tcb->srcaddr = PcbGetDefaultSrcAddr(rte);
}
// If this is an active open, go to SYN-SENT state.
// Otherwise, we should already be in SYN-RECEIVED state.
if (!synAck) {
TcbSetState(tcb, TCPST_SYN_SENT, "Connect");
}
// Set up MSS to make sure we don't have to
// fragment on the first hop. We assume the first hop
// interface won't change during the life of the connection.
tcb->rcv_mss = rte->ifp->mtu - IPHDRLEN - TCPHDRLEN;
tcb->maxRecvBufsize =
((tcb->maxRecvBufsize + tcb->rcv_mss - 1) / tcb->rcv_mss) * tcb->rcv_mss;
if (tcb->maxRecvBufsize > cfgMaxSendRecvBufsize)
tcb->maxRecvBufsize = cfgMaxSendRecvBufsize;
tcb->dstaddr = dstaddr;
tcb->dstport = dstport;
tcb->snd_isn = TcbGetIsn();
tcb->rcv_wnd = tcb->maxRecvBufsize;
ASSERT(IsTcpRecvBufEmpty(tcb));
// send out a SYN or SYN/ACK segment
status = TcbEmitSYN(tcb, synAck);
exit:
LowerFromDpc(irql);
return status;
}
#if DBG
extern ULONG XDebugOutLevel;
VOID
TcbSetState(
TCB* tcb,
BYTE state,
const CHAR* caller
)
/*++
Routine Description:
Change the TCP connection state
Arguments:
tcb - Points to the TCP control block
state - Specifies the new connectio state
caller - Name of the calling function
Return Value:
NONE
--*/
{
static const CHAR* stateNames[] = {
"CLOSED",
"LISTEN",
"SYN_SENT",
"SYN_RECEIVED",
"ESTABLISHED",
"FIN_WAIT_1",
"FIN_WAIT_2",
"CLOSING",
"TIME_WAIT",
"CLOSE_WAIT",
"LAST_ACK"
};
BYTE oldstate = tcb->tcpstate;
tcb->tcpstate = state;
// Debug spew...
if (XDebugOutLevel < XDBG_TRACE) return;
DbgPrint("%s ", caller);
DbgPrint("%s:%d - ", IPADDRSTR(tcb->srcaddr), NTOHS(tcb->srcport));
DbgPrint("%s:%d: ", IPADDRSTR(tcb->dstaddr), NTOHS(tcb->dstport));
DbgPrint("%s => %s\n", stateNames[oldstate], stateNames[state]);
}
#endif // DBG