709 lines
15 KiB
C
709 lines
15 KiB
C
/*++
|
|
|
|
Copyright (c) 2000 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
tcpconn.c
|
|
|
|
Abstract:
|
|
|
|
TCP connection management functions
|
|
|
|
Revision History:
|
|
|
|
06/05/2000 davidx
|
|
Created it.
|
|
|
|
--*/
|
|
|
|
#include "precomp.h"
|
|
|
|
|
|
//
|
|
// Maximum segment lifetime (30 seconds)
|
|
//
|
|
UINT cfgMSL = 30 * SLOW_TCP_TIMER_FREQ;
|
|
|
|
//
|
|
// Timeout period for active connection requests
|
|
// NOTE: Non-standard behavior!!!
|
|
// Our connection timeout value of 20sec is much shorter
|
|
// than the usual value of 75sec.
|
|
//
|
|
UINT cfgConnectTimeout = 20 * SLOW_TCP_TIMER_FREQ;
|
|
|
|
//
|
|
// Maximum backlogs for a listening socket
|
|
//
|
|
INT cfgMaxListenBacklog = 5;
|
|
|
|
//
|
|
// Default linger time when a connection is gracefully closed (2 minutes)
|
|
//
|
|
USHORT cfgDefaultLingerTime = 2*60;
|
|
|
|
//
|
|
// Default retransmission timeout (3 seconds)
|
|
//
|
|
UINT cfgDefaultRTO = 3*SLOW_TCP_TIMER_FREQ;
|
|
|
|
//
|
|
// List of TCP connections that has been reset but not yet closed
|
|
//
|
|
LIST_ENTRY DeadTcbList;
|
|
|
|
|
|
VOID
|
|
TcbInit(
|
|
TCB* tcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Initialize a TCP control block
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to a newly allocated TCB structure
|
|
|
|
Return Value:
|
|
|
|
NONE
|
|
|
|
Note:
|
|
|
|
We assume all fields have been zero-initialized
|
|
when this function is called.
|
|
|
|
--*/
|
|
|
|
{
|
|
InitializeListHead(&tcb->listenq);
|
|
tcb->linger.l_linger = cfgDefaultLingerTime;
|
|
|
|
tcb->snd_mss = tcb->rcv_mss = TCP_DEFAULT_MSS;
|
|
tcb->snd_cwnd = tcb->snd_ssthresh = MAX_TCP_WNDSIZE;
|
|
tcb->srtt_8 = 0;
|
|
tcb->rttvar_4 = tcb->RTO = cfgDefaultRTO;
|
|
|
|
tcb->sendbufNext = TcbSendbufNil(tcb);
|
|
}
|
|
|
|
|
|
//
|
|
// Disassociate a child socket (i.e. pending connection
|
|
// request for a listening socket) from its parent
|
|
//
|
|
PRIVATE VOID TcbDisassociateChild(TCB* childTcb)
|
|
{
|
|
RemoveEntryList(&childTcb->listenq);
|
|
InitializeListHead(&childTcb->listenq);
|
|
childTcb->parent->backlog--;
|
|
childTcb->parent = NULL;
|
|
}
|
|
|
|
|
|
//
|
|
// Trim the backlog of pending connection requests
|
|
//
|
|
INLINE VOID TcbTrimConnReqBacklog(TCB* tcb, UINT maxBacklog) {
|
|
while (tcb->backlog > maxBacklog) {
|
|
TCB* childTcb = CONTAINING_RECORD(tcb->listenq.Blink, TCB, listenq);
|
|
TcbDisassociateChild(childTcb);
|
|
TcbDelete(childTcb);
|
|
}
|
|
}
|
|
|
|
|
|
BOOL
|
|
TcbClose(
|
|
TCB* tcb,
|
|
BOOL forceful
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Close a TCP connection
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to the TCB structure
|
|
forceful - Forceful close, always succeed and no wait
|
|
|
|
Return Value:
|
|
|
|
TRUE if the TCB structure can be deleted immediately.
|
|
FALSE if the connection is being gracefully closed
|
|
and the TCB structure should be kept around until
|
|
the connection is really closed.
|
|
|
|
--*/
|
|
|
|
{
|
|
NTSTATUS status;
|
|
|
|
//
|
|
// Try to gracefully close the socket if requested by the caller
|
|
//
|
|
if (!forceful && (tcb->tcpstate >= TCPST_SYN_RECEIVED) &&
|
|
(!tcb->linger.l_onoff || tcb->linger.l_linger > 0)) {
|
|
status = TcbShutdown(tcb, PCBFLAG_BOTH_SHUTDOWN, FALSE);
|
|
if (NT_SUCCESS(status)) {
|
|
UINT linger = tcb->linger.l_onoff ?
|
|
tcb->linger.l_linger :
|
|
cfgDefaultLingerTime;
|
|
|
|
// TIME-WAIT and linger timers are the same
|
|
TcbSetTimeWaitTimer(tcb, linger * SLOW_TCP_TIMER_FREQ);
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
if (tcb->parent) {
|
|
// This was a pending connection request
|
|
// for a listening socket.
|
|
TcbDisassociateChild(tcb);
|
|
} else {
|
|
// Clear all pending connection requests
|
|
TcbTrimConnReqBacklog(tcb, 0);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
NTSTATUS
|
|
TcbShutdown(
|
|
TCB* tcb,
|
|
BYTE flags,
|
|
BOOL apicall
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Gracefully shutdown a TCP connection
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to the TCP control block
|
|
flags - Shutdown flags: PCBFLAG_SEND_SHUTDOWN and/or PCBFLAG_RECV_SHUTDOWN
|
|
apicall - Whether this is called by shutdown API
|
|
|
|
Return Value:
|
|
|
|
Status code
|
|
|
|
--*/
|
|
|
|
{
|
|
NTSTATUS status = NETERR_OK;
|
|
KIRQL irql = RaiseToDpc();
|
|
|
|
if ((flags & PCBFLAG_RECV_SHUTDOWN) && !IsPcbRecvShutdown(tcb)) {
|
|
tcb->flags |= PCBFLAG_RECV_SHUTDOWN;
|
|
if (!IsTcpRecvBufEmpty(tcb)) {
|
|
//
|
|
// If the receive buffer is not empty,
|
|
// we'll send a RST to the connection peer
|
|
// and reset the connection.
|
|
//
|
|
TcbResetPeer(tcb);
|
|
status = apicall ? NETERR_OK : NETERR_CONNRESET;
|
|
tcb = TcbReset(tcb, NETERR_CONNRESET);
|
|
ASSERT(tcb != NULL);
|
|
} else {
|
|
// Cancel any pending overlapped receive requests
|
|
PcbClearOverlappedRecvs(tcb, NETERR(WSAESHUTDOWN));
|
|
}
|
|
}
|
|
|
|
if ((flags & PCBFLAG_SEND_SHUTDOWN) && !IsPcbSendShutdown(tcb)) {
|
|
// Cancel any pending overlapped send requests
|
|
PcbClearOverlappedSends(tcb, NETERR(WSAESHUTDOWN));
|
|
|
|
switch (tcb->tcpstate) {
|
|
case TCPST_SYN_RECEIVED:
|
|
case TCPST_ESTABLISHED:
|
|
status = TcbEmitFIN(tcb);
|
|
if (NT_SUCCESS(status)) {
|
|
TcbSetState(tcb, TCPST_FIN_WAIT_1, "Shutdown");
|
|
tcb->flags |= PCBFLAG_SEND_SHUTDOWN;
|
|
}
|
|
break;
|
|
|
|
case TCPST_CLOSE_WAIT:
|
|
status = TcbEmitFIN(tcb);
|
|
if (NT_SUCCESS(status)) {
|
|
TcbSetState(tcb, TCPST_LAST_ACK, "Shutdown");
|
|
tcb->flags |= PCBFLAG_SEND_SHUTDOWN;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
ASSERT(tcb->tcpstate < TCPST_SYN_RECEIVED);
|
|
status = NETERR(WSAENOTCONN);
|
|
break;
|
|
}
|
|
}
|
|
|
|
flags &= ~PCBFLAG_REVIVABLE;
|
|
LowerFromDpc(irql);
|
|
return status;
|
|
}
|
|
|
|
|
|
TCB*
|
|
TcbReset(
|
|
TCB* tcb,
|
|
NTSTATUS status
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Reset a TCP connection
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to the TCP control block
|
|
status - Specifies the cause of the reset
|
|
|
|
Return Value:
|
|
|
|
NULL if the specified TCB was deleted inside this call.
|
|
Otherwise, just return the input tcb parameter.
|
|
|
|
--*/
|
|
|
|
{
|
|
BOOL revivable;
|
|
|
|
ASSERT(!IsTcpIdleState(tcb));
|
|
|
|
if (IsPendingConnReqTcb(tcb) || TcbIsLingering(tcb)) {
|
|
TcbDelete(tcb);
|
|
return NULL;
|
|
}
|
|
|
|
// Remove the TCB from the active list
|
|
// and move it to the zombie list.
|
|
|
|
ASSERT(!IsListNull(&tcb->links));
|
|
RemoveEntryList(&tcb->links);
|
|
InsertHeadList(&DeadTcbList, &tcb->links);
|
|
|
|
tcb->errStatus = (status != NETERR_CONNRESET || IsTcpSyncState(tcb)) ?
|
|
status :
|
|
NETERR(WSAECONNREFUSED);
|
|
|
|
revivable = !IsTcpSyncState(tcb) &&
|
|
(tcb->flags & PCBFLAG_BOTH_SHUTDOWN) == 0;
|
|
|
|
tcb->flags |= PCBFLAG_BOTH_SHUTDOWN;
|
|
TcbSetState(tcb, TCPST_CLOSED, "Reset");
|
|
|
|
// Stop various timers
|
|
// NOTE: This is a little hacky - we're clearing all the fields
|
|
// from delayedAcks to rtt_tick in one shot. Be careful when you
|
|
// add or remove fields from the TCB structure.
|
|
|
|
ZeroMem(
|
|
&tcb->delayedAcks,
|
|
offsetof(TCB, rtt_tick) + sizeof(tcb->rtt_tick) - offsetof(TCB, delayedAcks));
|
|
|
|
TcbInit(tcb);
|
|
|
|
PcbSignalEvent(tcb, PCBEVENT_ALL);
|
|
PcbCleanup((PCB*) tcb, revivable);
|
|
return tcb;
|
|
}
|
|
|
|
|
|
TCB*
|
|
TcbCloneChild(
|
|
TCB* tcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Add a pending connection request to a listening socket
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to the listening socket
|
|
|
|
Return Value:
|
|
|
|
Pointer to the child socket
|
|
NULL if there is an error
|
|
|
|
--*/
|
|
|
|
{
|
|
TCB* childTcb;
|
|
|
|
// Fail if the backlog is full
|
|
ASSERT(tcb->tcpstate == TCPST_LISTEN);
|
|
if (tcb->backlog >= tcb->maxBacklog)
|
|
return NULL;
|
|
|
|
// Allocate memory for the child socket
|
|
childTcb = (TCB*) PcbCreate(SOCK_STREAM, IPPROTOCOL_TCP, PCBFLAG_NETPOOL);
|
|
if (!childTcb) return NULL;
|
|
|
|
// Associate the child socket with its parent
|
|
childTcb->parent = tcb;
|
|
tcb->backlog++;
|
|
InsertTailList(&tcb->listenq, &childTcb->listenq);
|
|
|
|
// Copy the local address bindings
|
|
childTcb->flags |= PCBFLAG_BOUND;
|
|
childTcb->bindSrcAddr = tcb->bindSrcAddr;
|
|
childTcb->srcaddr = tcb->srcaddr;
|
|
childTcb->srcport = tcb->srcport;
|
|
if (tcb->bindIfp) {
|
|
CACHE_IFP_REFERENCE(childTcb->bindIfp, tcb->bindIfp);
|
|
}
|
|
|
|
// Copy socket options
|
|
childTcb->options = tcb->options;
|
|
if (tcb->ipoptlen) {
|
|
PcbSetIpOpts((PCB*) childTcb, tcb->ipopts, tcb->ipoptlen);
|
|
} else {
|
|
ASSERT(childTcb->ipopts == NULL);
|
|
}
|
|
|
|
return childTcb;
|
|
}
|
|
|
|
|
|
NTSTATUS
|
|
TcbListen(
|
|
TCB* tcb,
|
|
INT backlog
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Switch a TCP socket into listening state
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to the TCP control block
|
|
backlog - Maximum number of pending connections
|
|
|
|
Return Value:
|
|
|
|
Status code
|
|
|
|
--*/
|
|
|
|
{
|
|
KIRQL irql;
|
|
NTSTATUS status;
|
|
|
|
if (!IsTcpIdleState(tcb))
|
|
return NETERR(WSAEISCONN);
|
|
|
|
irql = RaiseToDpc();
|
|
|
|
status = PcbGetErrStatus(tcb);
|
|
if (NT_SUCCESS(status)) {
|
|
TcbSetState(tcb, TCPST_LISTEN, "Listen");
|
|
tcb->maxBacklog = (backlog < 1) ? 1 :
|
|
(backlog > cfgMaxListenBacklog) ?
|
|
cfgMaxListenBacklog :
|
|
backlog;
|
|
|
|
// Trim down the backlog of pending connections if needed
|
|
TcbTrimConnReqBacklog(tcb, tcb->maxBacklog);
|
|
status = NETERR_OK;
|
|
}
|
|
|
|
LowerFromDpc(irql);
|
|
return status;
|
|
}
|
|
|
|
|
|
NTSTATUS
|
|
TcbAccept(
|
|
TCB* tcb,
|
|
TCB** newtcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Accept an incoming TCP connection request
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to the TCP control block
|
|
newtcb - Returns a pointer to the newly accepted connection block
|
|
|
|
Return Value:
|
|
|
|
Status code
|
|
|
|
--*/
|
|
|
|
{
|
|
KIRQL irql;
|
|
LIST_ENTRY* entry;
|
|
NTSTATUS status = NETERR_CONNRESET;
|
|
|
|
irql = RaiseToDpc();
|
|
|
|
entry = tcb->listenq.Flink;
|
|
while (entry != &tcb->listenq) {
|
|
TCB* childTcb = CONTAINING_RECORD(entry, TCB, listenq);
|
|
if (IsPcbConnected(childTcb)) {
|
|
TcbDisassociateChild(childTcb);
|
|
*newtcb = childTcb;
|
|
status = NETERR_OK;
|
|
break;
|
|
}
|
|
entry = entry->Flink;
|
|
}
|
|
|
|
LowerFromDpc(irql);
|
|
return status;
|
|
}
|
|
|
|
|
|
BOOL
|
|
TcbHasConnectedPendingConnReq(
|
|
TCB* tcb
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Check to see if a listening socket has pending connection
|
|
requests that are in ESTABLISHED state.
|
|
|
|
Arguments:
|
|
|
|
TCB - Points to the TCP control block
|
|
|
|
Return Value:
|
|
|
|
TRUE if there is pending connections ready to be accepted
|
|
FALSE otherwise
|
|
|
|
--*/
|
|
|
|
{
|
|
KIRQL irql;
|
|
LIST_ENTRY* entry;
|
|
BOOL result = FALSE;
|
|
|
|
irql = RaiseToDpc();
|
|
|
|
entry = tcb->listenq.Flink;
|
|
while (entry != &tcb->listenq) {
|
|
TCB* childTcb = CONTAINING_RECORD(entry, TCB, listenq);
|
|
if (IsPcbConnected(childTcb)) {
|
|
result = TRUE;
|
|
break;
|
|
}
|
|
entry = entry->Flink;
|
|
}
|
|
|
|
LowerFromDpc(irql);
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
NTSTATUS
|
|
TcbConnect(
|
|
TCB* tcb,
|
|
IPADDR dstaddr,
|
|
IPPORT dstport,
|
|
BOOL synAck
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Make a TCP connection to the specified foreign address
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to the TCP control block
|
|
dstaddr, dstport - Specifies the foreign address to connect to
|
|
synAck - Whether we're acknowledging a received SYN request
|
|
|
|
Return Value:
|
|
|
|
Status code
|
|
|
|
--*/
|
|
|
|
{
|
|
NTSTATUS status;
|
|
KIRQL irql;
|
|
RTE* rte;
|
|
|
|
if (dstport == 0 ||
|
|
dstaddr == 0 ||
|
|
IS_BCAST_IPADDR(dstaddr) ||
|
|
IS_MCAST_IPADDR(dstaddr))
|
|
return NETERR(WSAEADDRNOTAVAIL);
|
|
|
|
if (IsTcpListenState(tcb))
|
|
return NETERR_PARAM;
|
|
|
|
// Bind to a local address if necessary
|
|
if (!IsPcbBound(tcb)) {
|
|
status = PcbBind((PCB*) tcb, 0, 0);
|
|
if (!NT_SUCCESS(status)) return status;
|
|
}
|
|
|
|
irql = RaiseToDpc();
|
|
|
|
if (tcb->flags & PCBFLAG_REVIVABLE) {
|
|
// Revive a TCB that has been reset
|
|
tcb->connectSelected = 0;
|
|
tcb->flags &= ~(PCBFLAG_REVIVABLE|PCBFLAG_BOTH_SHUTDOWN);
|
|
tcb->eventFlags = 0;
|
|
KeClearEvent(GetPcbWaitEvent(tcb));
|
|
tcb->errStatus = NETERR_OK;
|
|
|
|
ASSERT(IsListEmpty(&tcb->listenq));
|
|
TcbInit(tcb);
|
|
|
|
// Move the TCB structure from the dead list
|
|
// back to the active list.
|
|
RemoveEntryList(&tcb->links);
|
|
InsertHeadList(&PcbList, &tcb->links);
|
|
} else {
|
|
status = PcbGetErrStatus(tcb);
|
|
if (!NT_SUCCESS(status)) goto exit;
|
|
}
|
|
|
|
if (!IsTcpIdleState(tcb) && !synAck) {
|
|
status = NETERR(WSAEALREADY);
|
|
goto exit;
|
|
}
|
|
|
|
// Find a route to the connected destination.
|
|
ASSERT(tcb->rte == NULL);
|
|
rte = tcb->rte = IpFindRTE(dstaddr, NULL);
|
|
if (!rte) {
|
|
status = NETERR_UNREACHABLE;
|
|
goto exit;
|
|
}
|
|
|
|
RteAddRef(rte);
|
|
if (tcb->bindSrcAddr == 0) {
|
|
tcb->srcaddr = PcbGetDefaultSrcAddr(rte);
|
|
}
|
|
|
|
// If this is an active open, go to SYN-SENT state.
|
|
// Otherwise, we should already be in SYN-RECEIVED state.
|
|
if (!synAck) {
|
|
TcbSetState(tcb, TCPST_SYN_SENT, "Connect");
|
|
}
|
|
|
|
// Set up MSS to make sure we don't have to
|
|
// fragment on the first hop. We assume the first hop
|
|
// interface won't change during the life of the connection.
|
|
tcb->rcv_mss = rte->ifp->mtu - IPHDRLEN - TCPHDRLEN;
|
|
tcb->maxRecvBufsize =
|
|
((tcb->maxRecvBufsize + tcb->rcv_mss - 1) / tcb->rcv_mss) * tcb->rcv_mss;
|
|
if (tcb->maxRecvBufsize > cfgMaxSendRecvBufsize)
|
|
tcb->maxRecvBufsize = cfgMaxSendRecvBufsize;
|
|
|
|
tcb->dstaddr = dstaddr;
|
|
tcb->dstport = dstport;
|
|
|
|
tcb->snd_isn = TcbGetIsn();
|
|
tcb->rcv_wnd = tcb->maxRecvBufsize;
|
|
ASSERT(IsTcpRecvBufEmpty(tcb));
|
|
|
|
// send out a SYN or SYN/ACK segment
|
|
status = TcbEmitSYN(tcb, synAck);
|
|
|
|
exit:
|
|
LowerFromDpc(irql);
|
|
return status;
|
|
}
|
|
|
|
|
|
#if DBG
|
|
|
|
extern ULONG XDebugOutLevel;
|
|
|
|
VOID
|
|
TcbSetState(
|
|
TCB* tcb,
|
|
BYTE state,
|
|
const CHAR* caller
|
|
)
|
|
|
|
/*++
|
|
|
|
Routine Description:
|
|
|
|
Change the TCP connection state
|
|
|
|
Arguments:
|
|
|
|
tcb - Points to the TCP control block
|
|
state - Specifies the new connectio state
|
|
caller - Name of the calling function
|
|
|
|
Return Value:
|
|
|
|
NONE
|
|
|
|
--*/
|
|
|
|
{
|
|
static const CHAR* stateNames[] = {
|
|
"CLOSED",
|
|
"LISTEN",
|
|
"SYN_SENT",
|
|
"SYN_RECEIVED",
|
|
"ESTABLISHED",
|
|
"FIN_WAIT_1",
|
|
"FIN_WAIT_2",
|
|
"CLOSING",
|
|
"TIME_WAIT",
|
|
"CLOSE_WAIT",
|
|
"LAST_ACK"
|
|
};
|
|
|
|
BYTE oldstate = tcb->tcpstate;
|
|
tcb->tcpstate = state;
|
|
|
|
// Debug spew...
|
|
|
|
if (XDebugOutLevel < XDBG_TRACE) return;
|
|
|
|
DbgPrint("%s ", caller);
|
|
DbgPrint("%s:%d - ", IPADDRSTR(tcb->srcaddr), NTOHS(tcb->srcport));
|
|
DbgPrint("%s:%d: ", IPADDRSTR(tcb->dstaddr), NTOHS(tcb->dstport));
|
|
DbgPrint("%s => %s\n", stateNames[oldstate], stateNames[state]);
|
|
}
|
|
|
|
#endif // DBG
|
|
|