2020-09-30 17:12:29 +02:00

3398 lines
123 KiB
C

/********************************************************************/
/** Microsoft LAN Manager **/
/** Copyright(c) Microsoft Corp., 1990-1993 **/
/********************************************************************/
/* :ts=4 */
//** TCPRCV.C - TCP receive protocol code.
//
// This file contains the code for handling incoming TCP packets.
//
#include "oscfg.h"
#include "ndis.h"
#include "cxport.h"
#include "ip.h"
#include "tdi.h"
#ifdef VXD
#include "tdivxd.h"
#include "tdistat.h"
#endif
#ifdef NT
#include "tdint.h"
#include "tdistat.h"
#endif
#include "queue.h"
#include "addr.h"
#include "tcp.h"
#include "tcb.h"
#include "tcpconn.h"
#include "tcpsend.h"
#include "tcprcv.h"
#include "tcpdeliv.h"
#include "tlcommon.h"
#include "info.h"
#include "tcpcfg.h"
#include "secfltr.h"
uint RequestCompleteFlags;
Queue ConnRequestCompleteQ;
Queue SendCompleteQ;
Queue TCBDelayQ;
#ifdef SYN_ATTACK
DEFINE_LOCK_STRUCTURE(SynAttLock)
#endif
DEFINE_LOCK_STRUCTURE(RequestCompleteLock)
DEFINE_LOCK_STRUCTURE(TCBDelayLock)
ulong TCBDelayRtnCount;
ulong TCBDelayRtnLimit;
#define TCB_DELAY_RTN_LIMIT 4
EXTERNAL_LOCK(TCBTableLock)
EXTERNAL_LOCK(AddrObjTableLock)
EXTERNAL_LOCK(ConnTableLock)
extern IPInfo LocalNetInfo;
#define PERSIST_TIMEOUT MS_TO_TICKS(500)
void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
#if FAST_RETRANSMIT
extern uint MaxDupAcks;
void ResetAndFastSend(TCB *SeqTCB, SeqNum NewSeq);
#endif
#ifdef NT
NTSTATUS
TCPPrepareIrpForCancel(
PTCP_CONTEXT TcpContext,
PIRP Irp,
PDRIVER_CANCEL CancelRoutine
);
extern void
TCPRequestComplete(
void *Context,
unsigned int Status,
unsigned int UnUsed
);
VOID
TCPCancelRequest(
PDEVICE_OBJECT Device,
PIRP Irp
);
//
// All of the init code can be discarded.
//
#ifdef ALLOC_PRAGMA
int InitTCPRcv(void);
void UnInitTCPRcv(void);
#pragma alloc_text(INIT, InitTCPRcv)
#pragma alloc_text(INIT, UnInitTCPRcv)
#endif // ALLOC_PRAGMA
#ifdef RASAUTODIAL
extern BOOLEAN fAcdLoadedG;
#endif
#endif // NT
//* AdjustRcvWin - Adjust the receive window on a TCB.
//
// A utility routine that adjusts the receive window to an even multiple of
// the local segment size. We round it up to the next closest multiple, or
// leave it alone if it's already an event multiple. We assume we have
// exclusive access to the input TCB.
//
// Input: WinTCB - TCB to be adjusted.
//
// Returns: Nothing.
//
void
AdjustRcvWin(TCB *WinTCB)
{
ushort LocalMSS;
uchar FoundMSS;
ulong SegmentsInWindow;
CTEAssert(WinTCB->tcb_defaultwin != 0);
CTEAssert(WinTCB->tcb_rcvwin != 0);
CTEAssert(WinTCB->tcb_remmss != 0);
if (WinTCB->tcb_flags & WINDOW_SET)
return;
// First, get the local MSS by calling IP.
FoundMSS = (*LocalNetInfo.ipi_getlocalmtu)(WinTCB->tcb_saddr, &LocalMSS);
// If we didn't find it, error out.
if (!FoundMSS) {
CTEAssert(FALSE);
return;
}
LocalMSS -= sizeof(TCPHeader);
LocalMSS = MIN(LocalMSS, WinTCB->tcb_remmss);
SegmentsInWindow = WinTCB->tcb_defaultwin / (ulong)LocalMSS;
// Make sure we have at least 4 segments in window, if that wouldn't make
// the window too big.
if (SegmentsInWindow < 4) {
// We have fewer than four segments in the window. Round up to 4
// if we can do so without exceeding the maximum window size; otherwise
// use the maximum multiple that we can fit in 64K. The exception is if
// we can only fit one integral multiple in the window - in that case
// we'll use a window of 0xffff.
if (LocalMSS <= (0xffff/4)) {
WinTCB->tcb_defaultwin = (uint)(4 * LocalMSS);
} else {
ulong SegmentsInMaxWindow;
// Figure out the maximum number of segments we could possibly
// fit in a window. If this is > 1, use that as the basis for
// our window size. Otherwise use a maximum size window.
SegmentsInMaxWindow = 0xffff/(ulong)LocalMSS;
if (SegmentsInMaxWindow != 1)
WinTCB->tcb_defaultwin = SegmentsInMaxWindow * (ulong)LocalMSS;
else
WinTCB->tcb_defaultwin = 0xffff;
}
WinTCB->tcb_rcvwin = WinTCB->tcb_defaultwin;
} else
// If it's not already an even multiple, bump the default and current
// windows to the nearest multiple.
if ((SegmentsInWindow * (ulong)LocalMSS) != WinTCB->tcb_defaultwin) {
ulong NewWindow;
NewWindow = (SegmentsInWindow + 1) * (ulong)LocalMSS;
// Don't let the new window be > 64K.
if (NewWindow <= 0xffff) {
WinTCB->tcb_defaultwin = (uint)NewWindow;
WinTCB->tcb_rcvwin = (uint)NewWindow;
}
}
}
//* CompleteRcvs - Complete rcvs on a TCB.
//
// Called when we need to complete rcvs on a TCB. We'll pull things from
// the TCB's rcv queue, as long as there are rcvs that have the PUSH bit
// set.
//
// Input: CmpltTCB - TCB to complete on.
//
// Returns: Nothing.
//
void
CompleteRcvs(TCB *CmpltTCB)
{
CTELockHandle TCBHandle;
TCPRcvReq *CurrReq, *NextReq, *IndReq;
CTEStructAssert(CmpltTCB, tcb);
CTEAssert(CmpltTCB->tcb_refcnt != 0);
CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
if (!CLOSING(CmpltTCB) && !(CmpltTCB->tcb_flags & RCV_CMPLTING)
&& (CmpltTCB->tcb_rcvhead != NULL)) {
CmpltTCB->tcb_flags |= RCV_CMPLTING;
for (;;) {
CurrReq = CmpltTCB->tcb_rcvhead;
IndReq = NULL;
do {
CTEStructAssert(CurrReq, trr);
if (CurrReq->trr_flags & TRR_PUSHED) {
// Need to complete this one. If this is the current rcv
// advance the current rcv to the next one in the list.
// Then set the list head to the next one in the list.
CTEAssert(CurrReq->trr_amt != 0 ||
!DATA_RCV_STATE(CmpltTCB->tcb_state));
NextReq = CurrReq->trr_next;
if (CmpltTCB->tcb_currcv == CurrReq)
CmpltTCB->tcb_currcv = NextReq;
CmpltTCB->tcb_rcvhead = NextReq;
if (NextReq == NULL) {
// We've just removed the last buffer. Set the
// rcvhandler to PendData, in case something
// comes in during the callback.
CTEAssert(CmpltTCB->tcb_rcvhndlr != IndicateData);
CmpltTCB->tcb_rcvhndlr = PendData;
}
CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
if (CurrReq->trr_uflags != NULL)
*(CurrReq->trr_uflags) =
TDI_RECEIVE_NORMAL | TDI_RECEIVE_ENTIRE_MESSAGE;
(*CurrReq->trr_rtn)(CurrReq->trr_context, TDI_SUCCESS,
CurrReq->trr_amt);
if (IndReq != NULL)
FreeRcvReq(CurrReq);
else
IndReq = CurrReq;
CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
CurrReq = CmpltTCB->tcb_rcvhead;
} else
// This one isn't to be completed, so bail out.
break;
} while (CurrReq != NULL);
// Now see if we've completed all of the requests. If we have, we
// may need to deal with pending data and/or reset the rcv. handler.
if (CurrReq == NULL) {
// We've completed everything that can be, so stop the push
// timer. We don't stop it if CurrReq isn't NULL because we
// want to make sure later data is eventually pushed.
STOP_TCB_TIMER(CmpltTCB->tcb_pushtimer);
CTEAssert(IndReq != NULL);
// No more recv. requests.
if (CmpltTCB->tcb_pendhead == NULL) {
FreeRcvReq(IndReq);
// No pending data. Set the rcv. handler to either PendData
// or IndicateData.
if (!(CmpltTCB->tcb_flags & (DISC_PENDING | GC_PENDING))) {
if (CmpltTCB->tcb_rcvind != NULL &&
CmpltTCB->tcb_indicated == 0)
CmpltTCB->tcb_rcvhndlr = IndicateData;
else
CmpltTCB->tcb_rcvhndlr = PendData;
} else {
goto Complete_Notify;
}
} else {
// We have pending data to deal with.
if (CmpltTCB->tcb_rcvind != NULL &&
CmpltTCB->tcb_indicated == 0) {
// There's a rcv. indicate handler on this TCB. Call
// the indicate handler with the pending data.
#ifdef VXD
CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
IndicatePendingData(CmpltTCB, IndReq);
SendACK(CmpltTCB);
CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
#else
IndicatePendingData(CmpltTCB, IndReq, TCBHandle);
SendACK(CmpltTCB);
CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
#endif
// See if a buffer has been posted. If so, we'll need
// to check and see if it needs to be completed.
if (CmpltTCB->tcb_rcvhead != NULL)
continue;
else {
// If the pending head is now NULL, we've used up
// all the data.
if (CmpltTCB->tcb_pendhead == NULL &&
(CmpltTCB->tcb_flags &
(DISC_PENDING | GC_PENDING)))
goto Complete_Notify;
}
} else {
// No indicate handler, so nothing to do. The rcv.
// handler should already be set to PendData.
FreeRcvReq(IndReq);
CTEAssert(CmpltTCB->tcb_rcvhndlr == PendData);
}
}
} else {
if (IndReq != NULL)
FreeRcvReq(IndReq);
CTEAssert(CmpltTCB->tcb_rcvhndlr == BufferData);
}
break;
}
CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
}
CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
return;
Complete_Notify:
// Something is pending. Figure out what it is, and do
// it.
if (CmpltTCB->tcb_flags & GC_PENDING) {
CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
// Bump the refcnt, because GracefulClose will
// deref the TCB and we're not really done with
// it yet.
CmpltTCB->tcb_refcnt++;
GracefulClose(CmpltTCB,
CmpltTCB->tcb_flags & TW_PENDING, TRUE,
TCBHandle);
} else
if (CmpltTCB->tcb_flags & DISC_PENDING) {
CmpltTCB->tcb_flags &= ~DISC_PENDING;
CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
NotifyOfDisc(CmpltTCB, NULL, TDI_GRACEFUL_DISC);
CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
} else {
CTEAssert(FALSE);
CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
}
return;
}
//* ProcessTCBDelayQ - Process TCBs on the delayed Q.
//
// Called at various times to process TCBs on the delayed Q.
//
// Entry: Nothing.
//
// Returns: Nothing.
//
void
ProcessTCBDelayQ(void)
{
CTELockHandle QHandle;
TCB *DelayTCB;
CTELockHandle TCBHandle;
CTEGetLock(&TCBDelayLock, &QHandle);
// Check for recursion. We do not stop recursion completely, only
// limit it. This is done to allow multiple threads to process the
// TCBDelayQ simultaneously.
TCBDelayRtnCount++;
if (TCBDelayRtnCount > TCBDelayRtnLimit) {
TCBDelayRtnCount--;
CTEFreeLock(&TCBDelayLock, QHandle);
return;
}
while (!EMPTYQ(&TCBDelayQ)) {
DEQUEUE(&TCBDelayQ, DelayTCB, TCB, tcb_delayq);
CTEStructAssert(DelayTCB, tcb);
CTEAssert(DelayTCB->tcb_refcnt != 0);
CTEAssert(DelayTCB->tcb_flags & IN_DELAY_Q);
CTEFreeLock(&TCBDelayLock, QHandle);
CTEGetLock(&DelayTCB->tcb_lock, &TCBHandle);
while (!CLOSING(DelayTCB) && (DelayTCB->tcb_flags & DELAYED_FLAGS)) {
if (DelayTCB->tcb_flags & NEED_RCV_CMPLT) {
DelayTCB->tcb_flags &= ~NEED_RCV_CMPLT;
CTEFreeLock(&DelayTCB->tcb_lock, TCBHandle);
CompleteRcvs(DelayTCB);
CTEGetLock(&DelayTCB->tcb_lock, &TCBHandle);
}
if (DelayTCB->tcb_flags & NEED_OUTPUT) {
DelayTCB->tcb_flags &= ~NEED_OUTPUT;
DelayTCB->tcb_refcnt++;
#ifdef VXD
CTEFreeLock(&DelayTCB->tcb_lock, TCBHandle);
TCPSend(DelayTCB);
#else
TCPSend(DelayTCB, TCBHandle);
#endif
CTEGetLock(&DelayTCB->tcb_lock, &TCBHandle);
}
if (DelayTCB->tcb_flags & NEED_ACK) {
DelayTCB->tcb_flags &= ~NEED_ACK;
CTEFreeLock(&DelayTCB->tcb_lock, TCBHandle);
SendACK(DelayTCB);
CTEGetLock(&DelayTCB->tcb_lock, &TCBHandle);
}
}
DelayTCB->tcb_flags &= ~IN_DELAY_Q;
DerefTCB(DelayTCB, TCBHandle);
CTEGetLock(&TCBDelayLock, &QHandle);
}
TCBDelayRtnCount--;
CTEFreeLock(&TCBDelayLock, QHandle);
}
//* DelayAction - Put a TCB on the queue for a delayed action.
//
// Called when we want to put a TCB on the DelayQ for a delayed action at
// rcv. complete or some other time. The lock on the TCB must be held when
// this is called.
//
// Input: DelayTCB - TCB which we're going to sched.
// Action - Action we're scheduling.
//
// Returns: Nothing.
//
void
DelayAction(TCB *DelayTCB, uint Action)
{
CTELockHandle DQHandle;
// Schedule the completion.
CTEGetLockAtDPC(&TCBDelayLock, &DQHandle);
DelayTCB->tcb_flags |= Action;
if (!(DelayTCB->tcb_flags & IN_DELAY_Q)) {
DelayTCB->tcb_flags |= IN_DELAY_Q;
DelayTCB->tcb_refcnt++; // Reference this for later.
ENQUEUE(&TCBDelayQ, &DelayTCB->tcb_delayq);
}
CTEFreeLockFromDPC(&TCBDelayLock, DQHandle);
}
//* TCPRcvComplete - Handle a receive complete.
//
// Called by the lower layers when we're done receiving. We look to see if
// we have and pending requests to complete. If we do, we complete them. Then
// we look to see if we have any TCBs pending for output. If we do, we
// get them going.
//
// Input: Nothing.
//
// Returns: Nothing.
//
void
TCPRcvComplete(void)
{
CTELockHandle CompleteHandle;
TCPReq *Req;
if (RequestCompleteFlags & ANY_REQUEST_COMPLETE) {
CTEGetLock(&RequestCompleteLock, &CompleteHandle);
if (!(RequestCompleteFlags & IN_RCV_COMPLETE)) {
RequestCompleteFlags |= IN_RCV_COMPLETE;
do {
if (RequestCompleteFlags & CONN_REQUEST_COMPLETE) {
if (!EMPTYQ(&ConnRequestCompleteQ)) {
DEQUEUE(&ConnRequestCompleteQ, Req, TCPReq, tr_q);
CTEStructAssert(Req, tr);
CTEStructAssert(*(TCPConnReq **)&Req, tcr);
CTEFreeLock(&RequestCompleteLock, CompleteHandle);
(*Req->tr_rtn)(Req->tr_context, Req->tr_status, 0);
FreeConnReq((TCPConnReq *)Req);
CTEGetLock(&RequestCompleteLock, &CompleteHandle);
} else
RequestCompleteFlags &= ~CONN_REQUEST_COMPLETE;
}
if (RequestCompleteFlags & SEND_REQUEST_COMPLETE) {
if (!EMPTYQ(&SendCompleteQ)) {
TCPSendReq *SendReq;
DEQUEUE(&SendCompleteQ, Req, TCPReq, tr_q);
CTEStructAssert(Req, tr);
SendReq = (TCPSendReq *)Req;
CTEStructAssert(SendReq, tsr);
CTEFreeLock(&RequestCompleteLock, CompleteHandle);
(*Req->tr_rtn)(Req->tr_context, Req->tr_status,
Req->tr_status == TDI_SUCCESS ? SendReq->tsr_size
: 0);
FreeSendReq((TCPSendReq *)Req);
CTEGetLock(&RequestCompleteLock, &CompleteHandle);
} else
RequestCompleteFlags &= ~SEND_REQUEST_COMPLETE;
}
} while (RequestCompleteFlags & ANY_REQUEST_COMPLETE);
RequestCompleteFlags &= ~IN_RCV_COMPLETE;
}
CTEFreeLock(&RequestCompleteLock, CompleteHandle);
}
ProcessTCBDelayQ();
}
//* CompleteConnReq - Complete a connection request on a TCB.
//
// A utility function to complete a connection request on a TCB. We remove
// the connreq, and put it on the ConnReqCmpltQ where it will be picked
// off later during RcvCmplt processing. We assume the TCB lock is held when
// we're called.
//
// Input: CmpltTCB - TCB from which to complete.
// OptInfo - IP OptInfo for completeion.
// Status - Status to complete with.
//
// Returns: Nothing.
//
void
CompleteConnReq(TCB *CmpltTCB, IPOptInfo *OptInfo, TDI_STATUS Status)
{
TCPConnReq *ConnReq;
CTELockHandle QueueHandle;
CTEStructAssert(CmpltTCB, tcb);
ConnReq = CmpltTCB->tcb_connreq;
if (ConnReq != NULL) {
// There's a connreq on this TCB. Fill in the connection information
// before returning it.
CmpltTCB->tcb_connreq = NULL;
UpdateConnInfo(ConnReq->tcr_conninfo, OptInfo, CmpltTCB->tcb_daddr,
CmpltTCB->tcb_dport);
ConnReq->tcr_req.tr_status = Status;
CTEGetLockAtDPC(&RequestCompleteLock, &QueueHandle);
RequestCompleteFlags |= CONN_REQUEST_COMPLETE;
ENQUEUE(&ConnRequestCompleteQ, &ConnReq->tcr_req.tr_q);
CTEFreeLockFromDPC(&RequestCompleteLock, QueueHandle);
} else
DEBUGCHK;
}
#ifdef SYN_ATTACK
void
SynAttChk ( AddrObj *ListenAO )
//
// function to check whether certain thresholds relevant to containing a
// SYN attack are being crossed.
//
// This function is called from FindListenConn when a connection has been
// found to handle the SYN request
//
{
BOOLEAN RexmitCntChanged = FALSE;
CTELockHandle Handle;
CTEGetLockAtDPC(&SynAttLock, &Handle);
//
// We are putting a connection in the syn_rcvd state. Check
// if we have reached the threshold. If we have reduce the
// number of retries to a lower value.
//
if ((++TCPHalfOpen >= TCPMaxHalfOpen) && (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
if (TCPHalfOpenRetried >= TCPMaxHalfOpenRetried) {
MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
RexmitCntChanged = TRUE;
}
}
//
// if this connection limit for a port was reached earlier.
// Check if the lower watermark is getting hit now.
//
if (ListenAO->ConnLimitReached)
{
ListenAO->ConnLimitReached = FALSE;
if (!RexmitCntChanged && (MaxConnectResponseRexmitCountTmp == ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
CTEAssert(TCPPortsExhausted > 0);
//
// The fact that FindListenConn found a connection on the port
// indicates that we had a connection available. This port
// was therefore not exhausted of connections. Set state
// appropriately. If the port has no more connections now,
// it will get added to the Exhausted count next time a syn for
// the port comes along.
//
if (--TCPPortsExhausted <= TCPMaxPortsExhaustedLW) {
MaxConnectResponseRexmitCountTmp =
MAX_CONNECT_RESPONSE_REXMIT_CNT;
}
}
}
CTEFreeLockFromDPC(&SynAttLock, Handle);
return;
}
#endif
//* FindListenConn - Find (or fabricate) a listening connection.
//
// Called by our Receive handler to decide what to do about an incoming
// SYN. We walk down the list of connections associated with the destination
// address, and if we find any in the listening state that can be used for
// the incoming request we'll take them, possibly returning a listen in the
// process. If we don't find any appropriate listening connections, we'll
// call the Connect Event handler if one is registerd. If all else fails,
// we'll return NULL and the SYN will be RST.
//
// The caller must hold the AddrObjTableLock before calling this routine,
// and that lock must have been taken at DPC level. This routine will free
// that lock back to DPC level.
//
// Input: ListenAO - Pointer to AddrObj for local address.
// Src - Source IP address of SYN.
// SrcPort - Source port of SYN.
// OptInfo - IP options info from SYN.
//
// Returns: Pointer to found TCB, or NULL if we can't find one.
//
TCB *
FindListenConn(AddrObj *ListenAO, IPAddr Src, ushort SrcPort, IPOptInfo *OptInfo)
{
CTELockHandle Handle; // Lock handle on AO, TCB.
TCB *CurrentTCB = NULL;
TCPConn *CurrentConn = NULL;
TCPConnReq *ConnReq = NULL;
CTELockHandle ConnHandle;
Queue *Temp;
uint FoundConn = FALSE;
CTEStructAssert(ListenAO, ao);
CTEGetLockAtDPC(&ConnTableLock, &ConnHandle);
CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
#ifdef NT
CTEFreeLockFromDPC(&AddrObjTableLock, DISPATCH_LEVEL);
#endif
// We have the lock on the AddrObj. Walk down it's list, looking
// for connections in the listening state.
if (AO_VALID(ListenAO)) {
if (ListenAO->ao_listencnt != 0) {
CTELockHandle TCBHandle;
Temp = QHEAD(&ListenAO->ao_listenq);
while (Temp != QEND(&ListenAO->ao_listenq)) {
CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
CTEStructAssert(CurrentConn, tc);
// If this TCB is in the listening state, with no delete
// pending, it's a candidate. Look at the pending listen
// info. to see if we should take it.
if ((CurrentTCB = CurrentConn->tc_tcb) != NULL) {
CTEStructAssert(CurrentTCB, tcb);
CTEAssert(CurrentTCB->tcb_state == TCB_LISTEN);
CTEGetLockAtDPC(&CurrentTCB->tcb_lock, &TCBHandle);
if (CurrentTCB->tcb_state == TCB_LISTEN &&
!PENDING_ACTION(CurrentTCB)) {
// Need to see if we can take it.
// See if the addresses specifed in the ConnReq
// match.
if ((IP_ADDR_EQUAL(CurrentTCB->tcb_daddr,
NULL_IP_ADDR) ||
IP_ADDR_EQUAL(CurrentTCB->tcb_daddr,
Src)) &&
(CurrentTCB->tcb_dport == 0 ||
CurrentTCB->tcb_dport == SrcPort)) {
FoundConn = TRUE;
break;
}
// Otherwise, this didn't match, so we'll check the
// next one.
}
CTEFreeLockFromDPC(&CurrentTCB->tcb_lock, TCBHandle);
}
Temp = QNEXT(Temp);;
}
// See why we've exited the loop.
if (FoundConn) {
CTEStructAssert(CurrentTCB, tcb);
// We exited because we found a TCB. If it's pre-accepted,
// we're done.
CurrentTCB->tcb_refcnt++;
CTEAssert(CurrentTCB->tcb_connreq != NULL);
ConnReq = CurrentTCB->tcb_connreq;
// If QUERY_ACCEPT isn't set, turn on the CONN_ACCEPTED bit.
if (!(ConnReq->tcr_flags & TDI_QUERY_ACCEPT))
CurrentTCB->tcb_flags |= CONN_ACCEPTED;
CurrentTCB->tcb_state = TCB_SYN_RCVD;
ListenAO->ao_listencnt--;
// Since he's no longer listening, remove him from the listen
// queue and put him on the active queue.
REMOVEQ(&CurrentConn->tc_q);
ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
#ifdef SYN_ATTACK
if (SynAttackProtect) {
SynAttChk(ListenAO);
}
#endif
CTEFreeLockFromDPC(&CurrentTCB->tcb_lock, TCBHandle);
CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
return CurrentTCB;
} else {
// Since we have a listening count, this should never happen
// if that count was non-zero initially.
CTEAssert(FALSE);
}
}
// We didn't find a matching TCB. If there's a connect indication
// handler, call it now to find a connection to accept on.
CTEAssert(FoundConn == FALSE);
if (ListenAO->ao_connect != NULL) {
uchar TAddress[TCP_TA_SIZE];
PVOID ConnContext;
PConnectEvent Event;
PVOID EventContext;
TDI_STATUS Status;
TCB *AcceptTCB;
TCPConnReq *ConnReq;
#ifdef NT
ConnectEventInfo *EventInfo;
#else
ConnectEventInfo EventInfo;
#endif
// He has a connect handler. Put the transport address together,
// and call him. We also need to get the necessary resources
// first.
AcceptTCB = AllocTCB();
ConnReq = GetConnReq();
if (AcceptTCB != NULL && ConnReq != NULL) {
Event = ListenAO->ao_connect;
EventContext = ListenAO->ao_conncontext;
BuildTDIAddress(TAddress, Src, SrcPort);
REF_AO(ListenAO);
AcceptTCB->tcb_state = TCB_LISTEN;
AcceptTCB->tcb_connreq = ConnReq;
AcceptTCB->tcb_flags |= CONN_ACCEPTED;
CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
IF_TCPDBG(TCP_DEBUG_CONNECT) {
TCPTRACE(("indicating connect request\n"));
}
Status = (*Event)(EventContext, TCP_TA_SIZE,
(PTRANSPORT_ADDRESS)TAddress, 0, NULL,
OptInfo->ioi_optlength, OptInfo->ioi_options,
&ConnContext, &EventInfo);
if (Status == TDI_MORE_PROCESSING) {
#ifdef NT
PIO_STACK_LOCATION IrpSp;
PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
Status = TCPPrepareIrpForCancel(
(PTCP_CONTEXT) IrpSp->FileObject->FsContext,
EventInfo,
TCPCancelRequest
);
if (!NT_SUCCESS(Status)) {
Status = TDI_NOT_ACCEPTED;
EventInfo = NULL;
goto AcceptIrpCancelled;
}
#endif // NT
// He accepted it. Find the connection on the AddrObj.
CTEGetLockAtDPC(&ConnTableLock, &ConnHandle);
CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
#ifdef NT
{
IF_TCPDBG(TCP_DEBUG_CONNECT) {
TCPTRACE((
"connect indication accepted, queueing request\n"
));
}
AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
&(IrpSp->Parameters);
ConnReq->tcr_conninfo =
AcceptRequest->ReturnConnectionInformation;
ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
ConnReq->tcr_req.tr_context = EventInfo;
}
#else // NT
ConnReq->tcr_req.tr_rtn = EventInfo.cei_rtn;
ConnReq->tcr_req.tr_context = EventInfo.cei_context;
ConnReq->tcr_conninfo = EventInfo.cei_conninfo;
#endif // NT
Temp = QHEAD(&ListenAO->ao_idleq);;
CurrentTCB = NULL;
Status = TDI_INVALID_CONNECTION;
while (Temp != QEND(&ListenAO->ao_idleq)) {
CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
CTEStructAssert(CurrentConn, tc);
if ((CurrentConn->tc_context == ConnContext) &&
!(CurrentConn->tc_flags & CONN_INVALID)) {
// We think we have a match. The connection
// shouldn't have a TCB associated with it. If it
// does, it's an error. InitTCBFromConn will
// handle all this.
AcceptTCB->tcb_refcnt = 1;
#ifdef NT
Status = InitTCBFromConn(CurrentConn, AcceptTCB,
AcceptRequest->RequestConnectionInformation,
TRUE);
#else // NT
Status = InitTCBFromConn(CurrentConn, AcceptTCB,
EventInfo.cei_acceptinfo,
TRUE);
#endif // NT
if (Status == TDI_SUCCESS) {
FoundConn = TRUE;
AcceptTCB->tcb_state = TCB_SYN_RCVD;
AcceptTCB->tcb_conn = CurrentConn;
CurrentConn->tc_tcb = AcceptTCB;
CurrentConn->tc_refcnt++;
// Move him from the idle q to the active
// queue.
REMOVEQ(&CurrentConn->tc_q);
ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
}
// In any case, we're done now.
break;
}
Temp = QNEXT(Temp);
}
if (!FoundConn) {
// Didn't find a match, or had an error. Status
// code is set.
// Complete the ConnReq and free the resources.
CompleteConnReq(AcceptTCB, OptInfo, Status);
FreeTCB(AcceptTCB);
AcceptTCB = NULL;
}
#ifdef SYN_ATTACK
else {
if (SynAttackProtect) {
SynAttChk(ListenAO);
}
}
#endif
LOCKED_DELAY_DEREF_AO(ListenAO);
CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
return AcceptTCB;
}
#ifdef SYN_ATTACK
if (SynAttackProtect) {
CTELockHandle Handle;
//
// If we need to Trigger to a lower retry count
//
if (!ListenAO->ConnLimitReached) {
ListenAO->ConnLimitReached = TRUE;
CTEGetLockAtDPC(&SynAttLock, &Handle);
if ((++TCPPortsExhausted >= TCPMaxPortsExhausted) &&
(MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
}
CTEFreeLockFromDPC(&SynAttLock, Handle);
}
}
#endif
#ifdef NT
AcceptIrpCancelled:
#endif // NT
// The event handler didn't take it. Dereference it, free
// the resources, and return NULL.
FreeConnReq(ConnReq);
FreeTCB(AcceptTCB);
DELAY_DEREF_AO(ListenAO);
return NULL;
} else {
// We couldn't get a needed resource. Free any that we
// did get, and fall through to the 'return NULL' code.
if (ConnReq != NULL)
FreeConnReq(ConnReq);
if (AcceptTCB != NULL)
FreeTCB(AcceptTCB);
}
}
#ifdef SYN_ATTACK
else {
if (SynAttackProtect) {
CTELockHandle Handle;
//
// If we need to Trigger to a lower retry count
//
if (!ListenAO->ConnLimitReached) {
ListenAO->ConnLimitReached = TRUE;
CTEGetLockAtDPC(&SynAttLock, &Handle);
if ((++TCPPortsExhausted >= TCPMaxPortsExhausted) &&
(MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
}
CTEFreeLockFromDPC(&SynAttLock, Handle);
}
}
}
#endif
// No event handler, or no resource. Free the locks, and return NULL.
CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
return NULL;
}
// If we get here, the address object wasn't valid.
CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
return NULL;
}
//* FindMSS - Find the MSS option in a segment.
//
// Called when a SYN is received to find the MSS option in a segment. If we
// don't find one, we assume the worst and return 536.
//
// Input: TCPH - TCP header to be searched.
//
// Returns: MSS to be used.
//
ushort
FindMSS(TCPHeader UNALIGNED *TCPH)
{
uint OptSize;
uchar *OptPtr;
OptSize = TCP_HDR_SIZE(TCPH) - sizeof(TCPHeader);
OptPtr = (uchar *)(TCPH + 1);
while (OptSize) {
if (*OptPtr == TCP_OPT_EOL)
break;
if (*OptPtr == TCP_OPT_NOP) {
OptPtr++;
OptSize--;
continue;
}
if (*OptPtr == TCP_OPT_MSS) {
if (OptPtr[1] == MSS_OPT_SIZE) {
ushort TempMss = *(ushort UNALIGNED *)(OptPtr + 2);
if (TempMss != 0)
return net_short(TempMss);
else
break; // MSS size of 0, use default.
} else
break; // Bad option size, use default.
} else {
// Unknown option.
if (OptPtr[1] == 0 || OptPtr[1] > OptSize)
break; // Bad option length, bail out.
OptSize -= OptPtr[1];
OptPtr += OptPtr[1];
}
}
return MAX_REMOTE_MSS;
}
//* ACKAndDrop - Acknowledge a segment, and drop it.
//
// Called from within the receive code when we need to drop a segment that's
// outside the receive window.
//
// Input: RI - Receive info for incoming segment.
// RcvTCB - TCB for incoming segment.
//
// Returns: Nothing.
//
void
ACKAndDrop(TCPRcvInfo *RI, TCB *RcvTCB)
{
CTELockHandle Handle;
#ifdef VXD
#ifdef DEBUG
Handle = DEFAULT_SIMIRQL;
#endif
#else
Handle = DISPATCH_LEVEL;
#endif
if (!(RI->tri_flags & TCP_FLAG_RST)) {
if (RcvTCB->tcb_state == TCB_TIME_WAIT)
START_TCB_TIMER(RcvTCB->tcb_rexmittimer, MAX_REXMIT_TO);
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, Handle);
SendACK(RcvTCB);
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &Handle);
}
DerefTCB(RcvTCB, Handle);
}
//* ACKData - Acknowledge data.
//
// Called from the receive handler to acknowledge data. We're given the
// TCB and the new value of senduna. We walk down the send q. pulling
// off sends and putting them on the complete q until we hit the end
// or we acknowledge the specified number of bytes of data.
//
// NOTE: We manipulate the send refcnt and acked flag without taking a lock.
// This is OK in the VxD version where locks don't mean anything anyway, but
// in the port to NT we'll need to add locking. The lock will have to be
// taken in the transmit complete routine. We can't use a lock in the TCB,
// since the TCB could go away before the transmit complete happens, and a lock
// in the TSR would be overkill, so it's probably best to use a global lock
// for this. If that causes too much contention, we could use a set of locks
// and pass a pointer to the appropriate lock back as part of the transmit
// confirm context. This lock pointer would also need to be stored in the
// TCB.
//
// Input: ACKTcb - TCB from which to pull data.
// SendUNA - New value of send una.
//
// Returns: Nothing.
//
void
ACKData(TCB *ACKTcb, SeqNum SendUNA)
{
Queue *End, *Current; // End and current elements.
Queue *TempQ, *EndQ;
Queue *LastCmplt; // Last one we completed.
TCPSendReq *CurrentTSR; // Current send req we're
// looking at.
PNDIS_BUFFER CurrentBuffer; // Current NDIS_BUFFER.
uint Updated = FALSE;
uint BufLength;
int Amount, OrigAmount;
long Result;
CTELockHandle Handle;
uint Temp;
CTEStructAssert(ACKTcb, tcb);
CheckTCBSends(ACKTcb);
Amount = SendUNA - ACKTcb->tcb_senduna;
CTEAssert(Amount > 0);
// Do a quick check to see if this acks everything that we have. If it does,
// handle it right away. We can only do this in the ESTABLISHED state,
// because we blindly update sendnext, and that can only work if we
// haven't sent a FIN.
if ((Amount == (int) ACKTcb->tcb_unacked) && ACKTcb->tcb_state == TCB_ESTAB) {
// Everything is acked.
CTEAssert(!EMPTYQ(&ACKTcb->tcb_sendq));
TempQ = ACKTcb->tcb_sendq.q_next;
INITQ(&ACKTcb->tcb_sendq);
ACKTcb->tcb_sendnext = SendUNA;
ACKTcb->tcb_senduna = SendUNA;
CTEAssert(ACKTcb->tcb_sendnext == ACKTcb->tcb_sendmax);
ACKTcb->tcb_cursend = NULL;
ACKTcb->tcb_sendbuf = NULL;
ACKTcb->tcb_sendofs = 0;
ACKTcb->tcb_sendsize = 0;
ACKTcb->tcb_unacked = 0;
// Now walk down the list of send requests. If the reference count
// has gone to 0, put it on the send complete queue.
CTEGetLock(&RequestCompleteLock, &Handle);
EndQ = &ACKTcb->tcb_sendq;
do {
CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q),
tsr_req);
CTEStructAssert(CurrentTSR, tsr);
TempQ = CurrentTSR->tsr_req.tr_q.q_next;
CurrentTSR->tsr_req.tr_status = TDI_SUCCESS;
Result = CTEInterlockedDecrementLong(&CurrentTSR->tsr_refcnt);
CTEAssert(Result >= 0);
if (Result <= 0) {
// No more references are outstanding, the send can be
// completed.
// If we've sent directly from this send, NULL out the next
// pointer for the last buffer in the chain.
if (CurrentTSR->tsr_lastbuf != NULL) {
NDIS_BUFFER_LINKAGE(CurrentTSR->tsr_lastbuf) = NULL;
CurrentTSR->tsr_lastbuf = NULL;
}
ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
Temp = ACKTcb->tcb_bcountlow;
ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
ENQUEUE(&SendCompleteQ, &CurrentTSR->tsr_req.tr_q);
}
} while (TempQ != EndQ);
RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
CTEFreeLock(&RequestCompleteLock, Handle);
CheckTCBSends(ACKTcb);
return;
}
OrigAmount = Amount;
End = QEND(&ACKTcb->tcb_sendq);
Current = QHEAD(&ACKTcb->tcb_sendq);
LastCmplt = NULL;
while (Amount > 0 && Current != End) {
CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
tsr_req);
CTEStructAssert(CurrentTSR, tsr);
if (Amount >= (int) CurrentTSR->tsr_unasize) {
// This is completely acked. Just advance to the next one.
Amount -= CurrentTSR->tsr_unasize;
LastCmplt = Current;
Current = QNEXT(Current);
continue;
}
// This one is only partially acked. Update his offset and NDIS buffer
// pointer, and break out. We know that Amount is < the unacked size
// in this buffer, we we can walk the NDIS buffer chain without fear
// of falling off the end.
CurrentBuffer = CurrentTSR->tsr_buffer;
CTEAssert(CurrentBuffer != NULL);
CTEAssert(Amount < (int) CurrentTSR->tsr_unasize);
CurrentTSR->tsr_unasize -= Amount;
BufLength = NdisBufferLength(CurrentBuffer) - CurrentTSR->tsr_offset;
if (Amount >= (int) BufLength) {
do {
Amount -= BufLength;
CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
CTEAssert(CurrentBuffer != NULL);
BufLength = NdisBufferLength(CurrentBuffer);
} while (Amount >= (int) BufLength);
CurrentTSR->tsr_offset = Amount;
CurrentTSR->tsr_buffer = CurrentBuffer;
} else
CurrentTSR->tsr_offset += Amount;
Amount = 0;
break;
}
#ifdef DEBUG
// We should always be able to remove at least Amount bytes, except in
// the case where a FIN has been sent. In that case we should be off
// by exactly one. In the debug builds we'll check this.
if (Amount != 0 && (!(ACKTcb->tcb_flags & FIN_SENT) || Amount != 1))
DEBUGCHK;
#endif
if (SEQ_GT(SendUNA, ACKTcb->tcb_sendnext)) {
if (Current != End) {
// Need to reevaluate CurrentTSR, in case we bailed out of the
// above loop after updating Current but before updating
// CurrentTSR.
CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
tsr_req);
CTEStructAssert(CurrentTSR, tsr);
ACKTcb->tcb_cursend = CurrentTSR;
ACKTcb->tcb_sendbuf = CurrentTSR->tsr_buffer;
ACKTcb->tcb_sendofs = CurrentTSR->tsr_offset;
ACKTcb->tcb_sendsize = CurrentTSR->tsr_unasize;
} else {
ACKTcb->tcb_cursend = NULL;
ACKTcb->tcb_sendbuf = NULL;
ACKTcb->tcb_sendofs = 0;
ACKTcb->tcb_sendsize = 0;
}
ACKTcb->tcb_sendnext = SendUNA;
}
// Now update tcb_unacked with the amount we tried to ack minus the
// amount we didn't ack (Amount should be 0 or 1 here).
CTEAssert(Amount == 0 || Amount == 1);
ACKTcb->tcb_unacked -= OrigAmount - Amount;
CTEAssert(*(int *)&ACKTcb->tcb_unacked >= 0);
ACKTcb->tcb_senduna = SendUNA;
// If we've acked any here, LastCmplt will be non-null, and Current will
// point to the send that should be at the start of the queue. Splice
// out the completed ones and put them on the end of the send completed
// queue, and update the TCB send q.
if (LastCmplt != NULL) {
Queue *FirstCmplt;
TCPSendReq *FirstTSR, *EndTSR;
CTEAssert(!EMPTYQ(&ACKTcb->tcb_sendq));
FirstCmplt = QHEAD(&ACKTcb->tcb_sendq);
// If we've acked everything, just reinit the queue.
if (Current == End) {
INITQ(&ACKTcb->tcb_sendq);
} else {
// There's still something on the queue. Just update it.
ACKTcb->tcb_sendq.q_next = Current;
Current->q_prev = &ACKTcb->tcb_sendq;
}
CheckTCBSends(ACKTcb);
// Now walk down the lists of things acked. If the refcnt on the send
// is 0, go ahead and put him on the send complete Q. Otherwise set
// the ACKed bit in the send, and he'll be completed when the count
// goes to 0 in the transmit confirm.
//
// Note that we haven't done any locking here. This will probably
// need to change in the port to NT.
// Set FirstTSR to the first TSR we'll complete, and EndTSR to be
// the first TSR that isn't completed.
FirstTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, FirstCmplt, tr_q),
tsr_req);
EndTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
tsr_req);
CTEStructAssert(FirstTSR, tsr);
CTEAssert(FirstTSR != EndTSR);
// Now walk the list of ACKed TSRs. If we can complete one, put him
// on the complete queue.
CTEGetLockAtDPC(&RequestCompleteLock, &Handle);
while (FirstTSR != EndTSR) {
TempQ = QNEXT(&FirstTSR->tsr_req.tr_q);
CTEStructAssert(FirstTSR, tsr);
FirstTSR->tsr_req.tr_status = TDI_SUCCESS;
// The tsr_lastbuf->Next field is zapped to 0 when the tsr_refcnt
// goes to 0, so we don't need to do it here.
// Decrement the reference put on the send buffer when it was
// initialized indicating the send has been acknowledged.
Result = CTEInterlockedDecrementLong(&(FirstTSR->tsr_refcnt));
CTEAssert(Result >= 0);
if (Result <= 0) {
// No more references are outstanding, the send can be
// completed.
// If we've sent directly from this send, NULL out the next
// pointer for the last buffer in the chain.
if (FirstTSR->tsr_lastbuf != NULL) {
NDIS_BUFFER_LINKAGE(FirstTSR->tsr_lastbuf) = NULL;
FirstTSR->tsr_lastbuf = NULL;
}
ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
Temp = ACKTcb->tcb_bcountlow;
ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
ENQUEUE(&SendCompleteQ, &FirstTSR->tsr_req.tr_q);
}
FirstTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q),
tsr_req);
}
RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
CTEFreeLockFromDPC(&RequestCompleteLock, Handle);
}
}
//* TrimRcvBuf - Trim the front edge of a receive buffer.
//
// A utility routine to trim the front of a receive buffer. We take in a
// a count (which may be 0) and adjust the pointer in the first buffer in
// the chain by that much. If there isn't that much in the first buffer,
// we move onto the next one. If we run out of buffers we'll return a pointer
// to the last buffer in the chain, with a size of 0. It's the caller's
// responsibility to catch this.
//
// Input: RcvBuf - Buffer to be trimmed.
// Count - Amount to be trimmed.
//
// Returns: A pointer to the new start, or NULL.
//
IPRcvBuf *
TrimRcvBuf(IPRcvBuf *RcvBuf, uint Count)
{
uint TrimThisTime;
CTEAssert(RcvBuf != NULL);
while (Count) {
CTEAssert(RcvBuf != NULL);
TrimThisTime = MIN(Count, RcvBuf->ipr_size);
Count -= TrimThisTime;
RcvBuf->ipr_buffer += TrimThisTime;
if ((RcvBuf->ipr_size -= TrimThisTime) == 0) {
if (RcvBuf->ipr_next != NULL)
RcvBuf = RcvBuf->ipr_next;
else {
// Ran out of buffers. Just return this one.
break;
}
}
}
return RcvBuf;
}
//* FreeRBChain - Free an RB chain.
//
// Called to free a chain of RBs. If we're the owner of each RB, we'll
// free it.
//
// Input: RBChain - RBChain to be freed.
//
// Returns: Nothing.
//
void
FreeRBChain(IPRcvBuf *RBChain)
{
while (RBChain != NULL) {
if (RBChain->ipr_owner == IPR_OWNER_TCP) {
IPRcvBuf *Temp;
Temp = RBChain->ipr_next;
CTEFreeMem(RBChain);
RBChain = Temp;
} else
RBChain = RBChain->ipr_next;
}
}
IPRcvBuf DummyBuf;
//* PullFromRAQ - Pull segments from the reassembly queue.
//
// Called when we've received frames out of order, and have some segments
// on the reassembly queue. We'll walk down the reassembly list, segments that
// are overlapped by the current rcv. next variable. When we get
// to one that doesn't completely overlap we'll trim it to fit the next
// rcv. seq. number, and pull it from the queue.
//
// Input: RcvTCB - TCB to pull from.
// RcvInfo - Pointer to TCPRcvInfo structure for current seg.
// Size - Pointer to size for current segment. We'll update
// this when we're done.
//
// Returns: Nothing.
//
IPRcvBuf *
PullFromRAQ(TCB *RcvTCB, TCPRcvInfo *RcvInfo, uint *Size)
{
TCPRAHdr *CurrentTRH; // Current TCP RA Header being examined.
TCPRAHdr *TempTRH; // Temporary variable.
SeqNum NextSeq; // Next sequence number we want.
IPRcvBuf *NewBuf;
SeqNum NextTRHSeq; // Seq. number immediately after
// current TRH.
int Overlap; // Overlap between current TRH and
// NextSeq.
CTEStructAssert(RcvTCB, tcb);
CurrentTRH = RcvTCB->tcb_raq;
NextSeq = RcvTCB->tcb_rcvnext;
while (CurrentTRH != NULL) {
CTEStructAssert(CurrentTRH, trh);
CTEAssert(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
// If the flags for the current reassembly segment contains a FIN,
// it should be the last segment on the queue. This assert checks
// that.
CTEAssert(!(CurrentTRH->trh_flags & TCP_FLAG_FIN) ||
CurrentTRH->trh_next == NULL);
if (SEQ_LT(NextSeq, CurrentTRH->trh_start)) {
#ifdef DEBUG
*Size = 0;
#endif
return NULL; // The next TRH starts too far down.
}
NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
if (SEQ_GTE(NextSeq, NextTRHSeq)) {
// The current TRH is overlapped completely. Free it and continue.
FreeRBChain(CurrentTRH->trh_buffer);
TempTRH = CurrentTRH->trh_next;
CTEFreeMem(CurrentTRH);
CurrentTRH = TempTRH;
RcvTCB->tcb_raq = TempTRH;
if (TempTRH == NULL) {
// We've just cleaned off the RAQ. We can go back on the
// fast path now.
if (--(RcvTCB->tcb_slowcount) == 0) {
RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
CheckTCBRcv(RcvTCB);
}
break;
}
} else {
Overlap = NextSeq - CurrentTRH->trh_start;
RcvInfo->tri_seq = NextSeq;
RcvInfo->tri_flags = CurrentTRH->trh_flags;
RcvInfo->tri_urgent = CurrentTRH->trh_urg;
if (Overlap != (int) CurrentTRH->trh_size) {
NewBuf = FreePartialRB(CurrentTRH->trh_buffer, Overlap);
*Size = CurrentTRH->trh_size - Overlap;
} else {
// This completely overlaps the data in this segment, but the
// sequence number doesn't overlap completely. There must
// be a FIN in the TRH. If we called FreePartialRB with this
// we'd end up returning NULL, which is the signal for failure.
// Instead we'll just return some bogus value that nobody
// will look at with a size of 0.
FreeRBChain(CurrentTRH->trh_buffer);
CTEAssert(CurrentTRH->trh_flags & TCP_FLAG_FIN);
NewBuf = &DummyBuf;
*Size = 0;
}
RcvTCB->tcb_raq = CurrentTRH->trh_next;
if (RcvTCB->tcb_raq == NULL) {
// We've just cleaned off the RAQ. We can go back on the
// fast path now.
if (--(RcvTCB->tcb_slowcount) == 0) {
RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
CheckTCBRcv(RcvTCB);
}
}
CTEFreeMem(CurrentTRH);
return NewBuf;
}
}
#ifdef DEBUG
*Size = 0;
#endif
return NULL;
}
//* CreateTRH - Create a TCP reassembly header.
//
// This function tries to create a TCP reassembly header. We take as input
// a pointer to the previous TRH in the chain, the RcvBuffer to put on,
// etc. and try to create and link in a TRH. The caller must hold the lock
// on the TCB when this is called.
//
// Input: PrevTRH - Pointer to TRH to insert after.
// RcvBuf - Pointer to IP RcvBuf chain.
// RcvInfo - Pointer to RcvInfo for this TRH.
// Size - Size in bytes of data.
//
// Returns: TRUE if we created it, FALSE otherwise.
//
uint
CreateTRH(TCPRAHdr *PrevTRH, IPRcvBuf *RcvBuf, TCPRcvInfo *RcvInfo, int Size)
{
TCPRAHdr *NewTRH;
IPRcvBuf *NewRcvBuf;
CTEAssert((Size > 0) || (RcvInfo->tri_flags & TCP_FLAG_FIN));
NewTRH = CTEAllocMem(sizeof(TCPRAHdr));
if (NewTRH == NULL)
return FALSE;
NewRcvBuf = CTEAllocMem(sizeof(IPRcvBuf) + Size);
if (NewRcvBuf == NULL) {
CTEFreeMem(NewTRH);
return FALSE;
}
#ifdef DEBUG
NewTRH->trh_sig = trh_signature;
#endif
NewRcvBuf->ipr_owner = IPR_OWNER_TCP;
NewRcvBuf->ipr_size = (uint)Size;
NewRcvBuf->ipr_next = NULL;
NewRcvBuf->ipr_buffer = (uchar *)(NewRcvBuf + 1);
if (Size != 0)
CopyRcvToBuffer(NewRcvBuf->ipr_buffer, RcvBuf, Size, 0);
NewTRH->trh_start = RcvInfo->tri_seq;
NewTRH->trh_flags = RcvInfo->tri_flags;
NewTRH->trh_size = Size;
NewTRH->trh_urg = RcvInfo->tri_urgent;
NewTRH->trh_buffer = NewRcvBuf;
NewTRH->trh_end = NewRcvBuf;
NewTRH->trh_next = PrevTRH->trh_next;
PrevTRH->trh_next = NewTRH;
return TRUE;
}
//* PutOnRAQ - Put a segment on the reassembly queue.
//
// Called during segment reception to put a segment on the reassembly
// queue. We try to use as few reassembly headers as possible, so if this
// segment has some overlap with an existing entry in the queue we'll just
// update the existing entry. If there is no overlap we'll create a new
// reassembly header. Combining URGENT data with non-URGENT data is tricky.
// If we get a segment that has urgent data that overlaps the front of a
// reassembly header we'll always mark the whole chunk as urgent - the value
// of the urgent pointer will mark the end of urgent data, so this is OK. If it
// only overlaps at the end, however, we won't combine, since we would have to
// mark previously non-urgent data as urgent. We'll trim the
// front of the incoming segment and create a new reassembly header. Also,
// if we have non-urgent data that overlaps at the front of a reassembly
// header containing urgent data we can't combine these two, since again we
// would mark non-urgent data as urgent.
// Our search will stop if we find an entry with a FIN.
// We assume that the TCB lock is held by the caller.
//
// Entry: RcvTCB - TCB on which to reassemble.
// RcvInfo - Pointer to RcvInfo for new segment.
// RcvBuf - IP RcvBuf chain for this segment.
// Size - Size in bytes of data in this segment.
//
// Returns: Nothing.
//
void
PutOnRAQ(TCB *RcvTCB, TCPRcvInfo *RcvInfo, IPRcvBuf *RcvBuf, uint Size)
{
TCPRAHdr *PrevTRH, *CurrentTRH; // Prev. and current TRH
// pointers.
SeqNum NextSeq; // Seq. number of first byte
// after segment being
// reassembled.
SeqNum NextTRHSeq; // Seq. number of first byte
// after current TRH.
uint Created;
CTEStructAssert(RcvTCB, tcb);
CTEAssert(RcvTCB->tcb_rcvnext != RcvInfo->tri_seq);
CTEAssert(!(RcvInfo->tri_flags & TCP_FLAG_SYN));
NextSeq = RcvInfo->tri_seq + Size +
((RcvInfo->tri_flags & TCP_FLAG_FIN) ? 1 : 0);
PrevTRH = STRUCT_OF(TCPRAHdr, &RcvTCB->tcb_raq, trh_next);
CurrentTRH = PrevTRH->trh_next;
// Walk down the reassembly queue, looking for the correct place to
// insert this, until we hit the end.
while (CurrentTRH != NULL) {
CTEStructAssert(CurrentTRH, trh);
CTEAssert(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
// First, see if it starts beyond the end of the current TRH.
if (SEQ_LTE(RcvInfo->tri_seq, NextTRHSeq)) {
// We know the incoming segment doesn't start beyond the end
// of this TRH, so we'll either create a new TRH in front of
// this one or we'll merge the new segment onto this TRH.
// If the end of the current segment is in front of the start
// of the current TRH, we'll need to create a new TRH. Otherwise
// we'll merge these two.
if (SEQ_LT(NextSeq, CurrentTRH->trh_start))
break;
else {
// There's some overlap. If there's actually data in the
// incoming segment we'll merge it.
if (Size != 0) {
int FrontOverlap, BackOverlap;
IPRcvBuf *NewRB;
// We need to merge. If there's a FIN on the incoming
// segment that would fall inside this current TRH, we
// have a protocol violation from the remote peer. In this
// case just return, discarding the incoming segment.
if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
SEQ_LTE(NextSeq, NextTRHSeq))
return;
// We have some overlap. Figure out how much.
FrontOverlap = CurrentTRH->trh_start - RcvInfo->tri_seq;
if (FrontOverlap > 0) {
// Have overlap in front. Allocate an IPRcvBuf to
// to hold it, and copy it, unless we would have to
// combine non-urgent with urgent.
if (!(RcvInfo->tri_flags & TCP_FLAG_URG) &&
(CurrentTRH->trh_flags & TCP_FLAG_URG)) {
if (CreateTRH(PrevTRH, RcvBuf, RcvInfo,
CurrentTRH->trh_start - RcvInfo->tri_seq)) {
PrevTRH = PrevTRH->trh_next;
CurrentTRH = PrevTRH->trh_next;
}
FrontOverlap = 0;
} else {
NewRB = CTEAllocMem(sizeof(IPRcvBuf) + FrontOverlap);
if (NewRB == NULL)
return; // Couldn't get the buffer.
NewRB->ipr_owner = IPR_OWNER_TCP;
NewRB->ipr_size = FrontOverlap;
NewRB->ipr_buffer = (uchar *)(NewRB + 1);
CopyRcvToBuffer(NewRB->ipr_buffer, RcvBuf,
FrontOverlap, 0);
CurrentTRH->trh_size += FrontOverlap;
NewRB->ipr_next = CurrentTRH->trh_buffer;
CurrentTRH->trh_buffer = NewRB;
CurrentTRH->trh_start = RcvInfo->tri_seq;
}
}
// We've updated the starting sequence number of this TRH
// if we needed to. Now look for back overlap. There can't
// be any back overlap if the current TRH has a FIN. Also
// we'll need to check for urgent data if there is back
// overlap.
if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
BackOverlap = RcvInfo->tri_seq + Size - NextTRHSeq;
if ((BackOverlap > 0) &&
(RcvInfo->tri_flags & TCP_FLAG_URG) &&
!(CurrentTRH->trh_flags & TCP_FLAG_URG) &&
(FrontOverlap <= 0)) {
int AmountToTrim;
// The incoming segment has urgent data and overlaps
// on the back but not the front, and the current
// TRH has no urgent data. We can't combine into
// this TRH, so trim the front of the incoming
// segment to NextTRHSeq and move to the next
// TRH.
AmountToTrim = NextTRHSeq - RcvInfo->tri_seq;
CTEAssert(AmountToTrim >= 0);
CTEAssert(AmountToTrim < (int) Size);
RcvBuf = FreePartialRB(RcvBuf, (uint)AmountToTrim);
RcvInfo->tri_seq += AmountToTrim;
RcvInfo->tri_urgent -= AmountToTrim;
PrevTRH = CurrentTRH;
CurrentTRH = PrevTRH->trh_next;
continue;
}
} else
BackOverlap = 0;
// Now if we have back overlap, copy it.
if (BackOverlap > 0) {
// We have back overlap. Get a buffer to copy it into.
// If we can't get one, we won't just return, because
// we may have updated the front and may need to
// update the urgent info.
NewRB = CTEAllocMem(sizeof(IPRcvBuf) + BackOverlap);
if (NewRB != NULL) {
// Got the buffer.
NewRB->ipr_owner = IPR_OWNER_TCP;
NewRB->ipr_size = BackOverlap;
NewRB->ipr_buffer = (uchar *)(NewRB + 1);
CopyRcvToBuffer(NewRB->ipr_buffer, RcvBuf,
BackOverlap, NextTRHSeq - RcvInfo->tri_seq);
CurrentTRH->trh_size += BackOverlap;
NewRB->ipr_next = CurrentTRH->trh_end->ipr_next;
CurrentTRH->trh_end->ipr_next = NewRB;
CurrentTRH->trh_end = NewRB;
}
}
// Everything should be consistent now. If there's an
// urgent data pointer in the incoming segment, update the
// one in the TRH now.
if (RcvInfo->tri_flags & TCP_FLAG_URG) {
SeqNum UrgSeq;
// Have an urgent pointer. If the current TRH already
// has an urgent pointer, see which is bigger. Otherwise
// just use this one.
UrgSeq = RcvInfo->tri_seq + RcvInfo->tri_urgent;
if (CurrentTRH->trh_flags & TCP_FLAG_URG) {
SeqNum TRHUrgSeq;
TRHUrgSeq = CurrentTRH->trh_start +
CurrentTRH->trh_urg;
if (SEQ_LT(UrgSeq, TRHUrgSeq))
UrgSeq = TRHUrgSeq;
} else
CurrentTRH->trh_flags |= TCP_FLAG_URG;
CurrentTRH->trh_urg = UrgSeq - CurrentTRH->trh_start;
}
} else {
// We have a 0 length segment. The only interesting thing
// here is if there's a FIN on the segment. If there is,
// and the seq. # of the incoming segment is exactly after
// the current TRH, OR matches the FIN in the current TRH,
// we note it.
if (RcvInfo->tri_flags & TCP_FLAG_FIN) {
if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
if (SEQ_EQ(NextTRHSeq, RcvInfo->tri_seq))
CurrentTRH->trh_flags |= TCP_FLAG_FIN;
else
DEBUGCHK;
}
else {
if ( !(SEQ_EQ((NextTRHSeq-1), RcvInfo->tri_seq)) ) {
DEBUGCHK;
}
}
}
}
return;
}
} else {
// Look at the next TRH, unless the current TRH has a FIN. If he
// has a FIN, we won't save any data beyond that anyway.
if (CurrentTRH->trh_flags & TCP_FLAG_FIN)
return;
PrevTRH = CurrentTRH;
CurrentTRH = PrevTRH->trh_next;
}
}
// When we get here, we need to create a new TRH. If we create one and
// there was previously nothing on the reassembly queue, we'll have to
// move off the fast receive path.
CurrentTRH = RcvTCB->tcb_raq;
Created = CreateTRH(PrevTRH, RcvBuf, RcvInfo, (int)Size);
if (Created && CurrentTRH == NULL) {
RcvTCB->tcb_slowcount++;
RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
CheckTCBRcv(RcvTCB);
}
}
//* TCPRcv - Receive a TCP segment.
//
// This is the routine called by IP when we need to receive a TCP segment.
// In general, we follow the RFC 793 event processing section pretty closely,
// but there is a 'fast path' where we make some quick checks on the incoming
// segment, and if it matches we deliver it immediately.
//
// Entry: IPContext - IPContext identifying physical i/f that
// received the data.
// Dest - IPAddr of destionation.
// Src - IPAddr of source.
// LocalAddr - Local address of network which caused this to be
// received.
// SrcAddr - Address of local interface which received the packet
// IPH - IP Header.
// IPHLength - Bytes in IPH.
// RcvBuf - Pointer to receive buffer chain containing data.
// Size - Size in bytes of data received.
// IsBCast - Boolean indicator of whether or not this came in as
// a bcast.
// Protocol - Protocol this came in on - should be TCP.
// OptInfo - Pointer to info structure for received options.
//
// Returns: Status of reception. Anything other than IP_SUCCESS will cause
// IP to send a 'port unreachable' message.
//
IP_STATUS
TCPRcv(void *IPContext, IPAddr Dest, IPAddr Src, IPAddr LocalAddr,
IPAddr SrcAddr, IPHeader UNALIGNED *IPH, uint IPHLength, IPRcvBuf *RcvBuf,
uint Size, uchar IsBCast, uchar Protocol, IPOptInfo *OptInfo)
{
TCPHeader UNALIGNED *TCPH; // The TCP header.
TCB *RcvTCB; // TCB on which to receive the packet.
CTELockHandle TableHandle, TCBHandle;
TCPRcvInfo RcvInfo; // Local swapped copy of rcv info.
uint DataOffset; // Offset from start of header to data.
uint Actions;
uint BytesTaken;
uint NewSize;
CheckRBList(RcvBuf, Size);
TStats.ts_insegs++;
// Checksum it, to make sure it's valid.
TCPH = (TCPHeader *)RcvBuf->ipr_buffer;
if (!IsBCast) {
if (Size >= sizeof(TCPHeader) && XsumRcvBuf(PHXSUM(Src, Dest, PROTOCOL_TCP,
Size), RcvBuf) == 0xffff) {
// The packet is valid. Get the info we need and byte swap it,
// and then try to find a matching TCB.
RcvInfo.tri_seq = net_long(TCPH->tcp_seq);
RcvInfo.tri_ack = net_long(TCPH->tcp_ack);
RcvInfo.tri_window = (uint)net_short(TCPH->tcp_window);
RcvInfo.tri_urgent = (uint)net_short(TCPH->tcp_urgent);
RcvInfo.tri_flags = (uint)TCPH->tcp_flags;
DataOffset = TCP_HDR_SIZE(TCPH);
if (DataOffset <= Size) {
Size -= DataOffset;
CTEAssert(DataOffset <= RcvBuf->ipr_size);
RcvBuf->ipr_size -= DataOffset;
RcvBuf->ipr_buffer += DataOffset;
CTEGetLockAtDPC(&TCBTableLock, &TableHandle);
RcvTCB = FindTCB(Dest, Src, TCPH->tcp_src, TCPH->tcp_dest);
if (RcvTCB != NULL) {
// Found one. Get the lock on it, and continue.
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TCBHandle);
CTEFreeLockFromDPC(&TCBTableLock, TCBHandle);
} else {
uchar DType;
// Didn't find a matching TCB. If this segment carries a SYN,
// find a matching address object and see it it has a listen
// indication. If it does, call it. Otherwise send a RST
// back to the sender.
CTEFreeLockFromDPC(&TCBTableLock, TableHandle);
// Make sure that the source address isn't a broadcast
// before proceeding.
if ((*LocalNetInfo.ipi_invalidsrc)(Src))
return IP_SUCCESS;
// If it doesn't have a SYN (and only a SYN), we'll send a
// reset.
if ((RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST)) ==
TCP_FLAG_SYN) {
AddrObj *AO;
//
// This segment had a SYN.
//
//
#ifdef NT
CTEGetLockAtDPC(&AddrObjTableLock, &TableHandle);
#endif
#ifdef SECFLTR
// See if we are filtering the
// destination interface/port.
//
if ( (!SecurityFilteringEnabled ||
IsPermittedSecurityFilter(
LocalAddr,
IPContext,
PROTOCOL_TCP,
(ulong) net_short(TCPH->tcp_dest)
))
)
{
#else // SECFLTR
if ( 1 ) {
#endif // SECFLTR
//
// Find a matching address object, and then try and find a
// listening connection on that AO.
//
AO = GetBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP);
if (AO != NULL) {
// Found an AO. Try and find a listening connection.
// FindListenConn will free the lock on the AddrObjTable.
RcvTCB = FindListenConn(AO, Src, TCPH->tcp_src, OptInfo);
if (RcvTCB != NULL) {
uint Inserted;
CTEStructAssert(RcvTCB, tcb);
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
// We found a listening connection. Initialize it
// now, and if it is actually to be accepted we'll
// send a SYN-ACK also.
CTEAssert(RcvTCB->tcb_state == TCB_SYN_RCVD);
RcvTCB->tcb_daddr = Src;
RcvTCB->tcb_saddr = Dest;
RcvTCB->tcb_dport = TCPH->tcp_src;
RcvTCB->tcb_sport = TCPH->tcp_dest;
RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_remmss = FindMSS(TCPH);
TStats.ts_passiveopens++;
RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
Inserted = InsertTCB(RcvTCB);
// Get the lock on it, and see if it's been
// accepted.
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
if (!Inserted) {
// Couldn't insert it!.
CompleteConnReq(RcvTCB, OptInfo,
TDI_CONNECTION_ABORTED);
RcvTCB->tcb_refcnt--;
#ifdef NT
TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
#else
TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, TableHandle);
#endif
return IP_SUCCESS;
}
RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
DelayAction(RcvTCB, NEED_OUTPUT);
}
// We'll need to update the options, in any case.
if (OptInfo->ioi_options != NULL) {
if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
(*LocalNetInfo.ipi_updateopts)(OptInfo,
&RcvTCB->tcb_opt, Src, NULL_IP_ADDR);
}
}
if (RcvTCB->tcb_flags & CONN_ACCEPTED) {
// The connection was accepted. Finish the
// initialization, and send the SYN ack.
#ifdef NT
AcceptConn(RcvTCB, DISPATCH_LEVEL);
#else
AcceptConn(RcvTCB, TableHandle);
#endif
return IP_SUCCESS;
} else {
// We don't know what to do about the
// connection yet. Return the pending listen,
// dereference the connection, and return.
CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
#ifdef NT
DerefTCB(RcvTCB, DISPATCH_LEVEL);
#else
DerefTCB(RcvTCB, TableHandle);
#endif
return IP_SUCCESS;
}
}
// No listening connection. AddrObjTableLock was
// released by FindListenConn. Fall through to send
// RST code.
} else {
// No address object. Free the lock, and fall through
// to the send RST code.
CTEFreeLockFromDPC(&AddrObjTableLock, TableHandle);
}
}
else {
// Operation not permitted. Free the lock, and fall through
// to the send RST code.
CTEFreeLockFromDPC(&AddrObjTableLock, TableHandle);
}
}
// Toss out any segments containing RST.
if (RcvInfo.tri_flags & TCP_FLAG_RST)
return IP_SUCCESS;
// Not a SYN, no AddrObj available, or port filtered.
// Send a RST back.
SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
return IP_SUCCESS;
}
// Do the fast path check. We can hit the fast path if the incoming
// sequence number matches our receive next and the masked flags
// match our 'predicted' flags.
CheckTCBRcv(RcvTCB);
RcvTCB->tcb_alive = TCPTime;
if (RcvTCB->tcb_rcvnext == RcvInfo.tri_seq &&
(RcvInfo.tri_flags & TCP_FLAGS_ALL) == RcvTCB->tcb_fastchk){
Actions = 0;
RcvTCB->tcb_refcnt++;
// The fast path. We know all we have to do here is ack sends and
// deliver data. First try and ack data.
if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
uint CWin;
uint MSS;
// The ack acknowledes something. Pull the
// appropriate amount off the send q.
ACKData(RcvTCB, RcvInfo.tri_ack);
// If this acknowledges something we were running a RTT on,
// update that stuff now.
if (RcvTCB->tcb_rtt != 0 && SEQ_GT(RcvInfo.tri_ack,
RcvTCB->tcb_rttseq)) {
short RTT;
RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
RcvTCB->tcb_rtt = 0;
RTT -= (RcvTCB->tcb_smrtt >> 3);
RcvTCB->tcb_smrtt += RTT;
RTT = (RTT >= 0 ? RTT : -RTT);
RTT -= (RcvTCB->tcb_delta >> 3);
RcvTCB->tcb_delta += RTT;
RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
MIN_RETRAN_TICKS), MAX_REXMIT_TO);
}
// Update the congestion window now.
CWin = RcvTCB->tcb_cwin;
MSS = RcvTCB->tcb_mss;
if (CWin < RcvTCB->tcb_maxwin) {
if (CWin < RcvTCB->tcb_ssthresh)
CWin += MSS;
else
CWin += (MSS * MSS)/CWin;
RcvTCB->tcb_cwin = CWin;
}
CTEAssert(*(int *)&RcvTCB->tcb_cwin > 0);
// We've acknowledged something, so reset the rexmit count.
// If there's still stuff outstanding, restart the rexmit
// timer.
RcvTCB->tcb_rexmitcnt = 0;
if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
else
START_TCB_TIMER(RcvTCB->tcb_rexmittimer, RcvTCB->tcb_rexmit);
// Since we've acknowledged data, we need to update the window.
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo.tri_window);
RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
// We've updated the window, remember to send some more.
Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
#if FAST_RETRANSMIT
{
// If the receiver has already sent dup acks, but we are not
// sending because the SendWin is less than a segment, then
// to avoid time outs on the previous send (receiver is waiting for
// retransmitted data but we are not sending the segment..) prematurely
// timeout (set rexmittimer to 1 tick)
//
int SendWin;
uint AmtOutstanding,AmtUnsent;
AmtOutstanding = (uint)(RcvTCB->tcb_sendnext -
RcvTCB->tcb_senduna);
AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
AmtOutstanding);
if ((Size == 0) &&
(SendWin < RcvTCB->tcb_mss) && (RcvTCB->tcb_dup > 0)) {
STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
START_TCB_TIMER(RcvTCB->tcb_rexmittimer, 1);
}
}
RcvTCB->tcb_dup = 0;
#endif
} else {
// It doesn't ack anything. If it's an ack for something
// larger than we've sent then ACKAndDrop it, otherwise
// ignore it.
if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
ACKAndDrop(&RcvInfo, RcvTCB);
return IP_SUCCESS;
} else
//SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax
// If the ack matches our existing UNA, we need to see if
// we can update the window.
// Or check if fast retransmit is needed
#if FAST_RETRANSMIT
// If it is a pure duplicate ack, check if it is
// time to retransmit immediately
if ( (Size == 0) && SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
(RcvTCB->tcb_sendwin == RcvInfo.tri_window) ) {
RcvTCB->tcb_dup++;
if ((RcvTCB->tcb_dup == MaxDupAcks) ) {
//Okay. Time to retransmit the segment the receiver is asking for
STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
RcvTCB->tcb_rtt = 0;
if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
// Don't let the slow start threshold go below 2
// segments
RcvTCB->tcb_ssthresh =
MAX(
MIN(RcvTCB->tcb_cwin,RcvTCB->tcb_sendwin) / 2,
(uint) RcvTCB->tcb_mss * 2 );
RcvTCB->tcb_cwin = RcvTCB->tcb_mss;
}
// Recall the segment in question and send it out
// Note that tcb_lock will be dereferenced by the caller
ResetAndFastSend (RcvTCB, RcvTCB->tcb_senduna);
return IP_SUCCESS;
} else if ((RcvTCB->tcb_dup > MaxDupAcks) ) {
int SendWin;
uint AmtOutstanding,AmtUnsent;
if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
(SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
(SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
RcvInfo.tri_window);
RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
// Since we've updated the window, remember to send
// some more.
Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
}
// Update the cwin to reflect the fact that the dup ack
// indicates the previous frame was received by the
// receiver
RcvTCB->tcb_cwin += RcvTCB->tcb_mss;
if ((RcvTCB->tcb_cwin+RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin ) {
AmtOutstanding = (uint)(RcvTCB->tcb_sendnext -
RcvTCB->tcb_senduna);
AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
AmtOutstanding);
if (SendWin < RcvTCB->tcb_mss) {
RcvTCB->tcb_force=1;
}
}
Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
} else if ((RcvTCB->tcb_dup < MaxDupAcks)) {
int SendWin;
uint AmtOutstanding,AmtUnsent;
if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
(SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
(SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
RcvInfo.tri_window);
RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
// Since we've updated the window, remember to send
// some more.
}
// Check if we need to set tcb_force.
if ((RcvTCB->tcb_cwin+RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin ) {
AmtOutstanding = (uint)(RcvTCB->tcb_sendnext -
RcvTCB->tcb_senduna);
AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
AmtOutstanding);
if (SendWin < RcvTCB->tcb_mss){
RcvTCB->tcb_force=1;
}
}
Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
} // End of all MaxDupAck cases
} else { // not a pure duplicate ack (size == 0 )
// Size !=0 or recvr is advertizing new window.
// update the window and check if
// anything needs to be sent
RcvTCB->tcb_dup = 0;
if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
(SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
(SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
RcvInfo.tri_window);
RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
// Since we've updated the window, remember to send
// some more.
Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
}
} // for SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax) case
#else //FAST_RETRANSMIT
if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
(SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
(SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
RcvInfo.tri_window);
RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
// Since we've updated the window, remember to send
// some more.
Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
}
#endif //FAST_RETRANSMIT
}
NewSize = MIN((int) Size, RcvTCB->tcb_rcvwin);
if (NewSize != 0) {
RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
#ifdef VXD
CTEFreeLock(&RcvTCB->tcb_lock, TableHandle);
BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB, RcvInfo.tri_flags,
RcvBuf, NewSize);
CTEGetLock(&RcvTCB->tcb_lock, &TableHandle);
#else
BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB, RcvInfo.tri_flags,
RcvBuf, NewSize);
#endif
RcvTCB->tcb_rcvnext += BytesTaken;
RcvTCB->tcb_rcvwin -= BytesTaken;
CheckTCBRcv(RcvTCB);
RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
Actions |= (RcvTCB->tcb_flags & SEND_AFTER_RCV ?
NEED_OUTPUT : 0);
RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
if ((RcvTCB->tcb_flags & ACK_DELAYED) || (BytesTaken != NewSize))
Actions |= NEED_ACK;
else {
RcvTCB->tcb_flags |= ACK_DELAYED;
START_TCB_TIMER(RcvTCB->tcb_delacktimer, DEL_ACK_TICKS);
}
} else {
// The new size is 0. If the original size was not 0, we must
// have a 0 rcv. win and hence need to send an ACK to this
// probe.
Actions |= (Size ? NEED_ACK : 0);
}
if (Actions)
DelayAction(RcvTCB, Actions);
#ifndef VXD
TableHandle = DISPATCH_LEVEL;
#endif
DerefTCB(RcvTCB, TableHandle);
return IP_SUCCESS;
}
#ifndef VXD
TableHandle = DISPATCH_LEVEL;
#endif
// Make sure we can handle this frame. We can't handle it if we're
// in SYN_RCVD and the accept is still pending, or we're in a
// non-established state and already in the receive handler.
if ((RcvTCB->tcb_state == TCB_SYN_RCVD &&
!(RcvTCB->tcb_flags & CONN_ACCEPTED)) ||
(RcvTCB->tcb_state != TCB_ESTAB && (RcvTCB->tcb_fastchk &
TCP_FLAG_IN_RCV))) {
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
return IP_SUCCESS;
}
// If it's closed, it's a temporary zombie TCB. Reset the sender.
if (RcvTCB->tcb_state == TCB_CLOSED || CLOSING(RcvTCB) ||
((RcvTCB->tcb_flags & (GC_PENDING | TW_PENDING)) == GC_PENDING)) {
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
return IP_SUCCESS;
}
// At this point, we have a connection, and it's locked. Following
// the 'Segment Arrives' section of 793, the next thing to check is
// if this connection is in SynSent state.
if (RcvTCB->tcb_state == TCB_SYN_SENT) {
CTEAssert(RcvTCB->tcb_flags & ACTIVE_OPEN);
// Check the ACK bit. Since we don't send data with our SYNs, the
// check we make is for the ack to exactly match our SND.NXT.
if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
// ACK is set.
if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendnext)) {
// Bad ACK value.
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
// Send a RST back at him.
SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
return IP_SUCCESS;
}
}
if (RcvInfo.tri_flags & TCP_FLAG_RST) {
// There's an acceptable RST. We'll persist here, sending
// another SYN in PERSIST_TIMEOUT ms, until we fail from too
// many retrys.
if (RcvTCB->tcb_rexmitcnt == MaxConnectRexmitCount) {
// We've had a positive refusal, and one more rexmit
// would time us out, so close the connection now.
CompleteConnReq(RcvTCB, OptInfo, TDI_CONN_REFUSED);
TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, TableHandle);
} else {
START_TCB_TIMER(RcvTCB->tcb_rexmittimer, PERSIST_TIMEOUT);
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
}
return IP_SUCCESS;
}
// See if we have a SYN. If we do, we're going to change state
// somehow (either to ESTABLISHED or SYN_RCVD).
if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
RcvTCB->tcb_refcnt++;
// We have a SYN. Go ahead and record the sequence number and
// window info.
RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
if (RcvInfo.tri_flags & TCP_FLAG_URG) {
// Urgent data. Update the pointer.
if (RcvInfo.tri_urgent != 0)
RcvInfo.tri_urgent--;
else
RcvInfo.tri_flags &= ~TCP_FLAG_URG;
}
RcvTCB->tcb_remmss = FindMSS(TCPH);
// If there are options, update them now. We already have an
// RCE open, so if we have new options we'll have to close
// it and open a new one.
if (OptInfo->ioi_options != NULL) {
if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
(*LocalNetInfo.ipi_updateopts)(OptInfo,
&RcvTCB->tcb_opt, Src, NULL_IP_ADDR);
(*LocalNetInfo.ipi_closerce)(RcvTCB->tcb_rce);
InitRCE(RcvTCB);
}
} else{
RcvTCB->tcb_mss = MIN(RcvTCB->tcb_mss, RcvTCB->tcb_remmss);
CTEAssert(RcvTCB->tcb_mss > 0);
}
RcvTCB->tcb_rexmitcnt = 0;
STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
AdjustRcvWin(RcvTCB);
if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
// Our SYN has been acked. Update SND.UNA and stop the
// retrans timer.
RcvTCB->tcb_senduna = RcvInfo.tri_ack;
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_maxwin = RcvInfo.tri_window;
RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
GoToEstab(RcvTCB);
#ifdef RASAUTODIAL
//
// Set a bit that informs TCBTimeout to notify
// the automatic connection driver of this new
// connection. Only set this flag if we
// have binded succesfully with the automatic
// connection driver.
//
if (fAcdLoadedG)
RcvTCB->tcb_flags |= ACD_CONN_NOTIF;
#endif // RASAUTODIAL
// Remove whatever command exists on this connection.
CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
SendACK(RcvTCB);
// Now handle other data and controls. To do this we need
// to reaquire the lock, and make sure we haven't started
// closing it.
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
if (!CLOSING(RcvTCB)) {
// We haven't started closing it. Turn off the
// SYN flag and continue processing.
RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
if ((RcvInfo.tri_flags & TCP_FLAGS_ALL) != TCP_FLAG_ACK ||
Size != 0)
goto NotSYNSent;
}
DerefTCB(RcvTCB, TableHandle);
return IP_SUCCESS;
} else {
// A SYN, but not an ACK. Go to SYN_RCVD.
RcvTCB->tcb_state = TCB_SYN_RCVD;
RcvTCB->tcb_sendnext = RcvTCB->tcb_senduna;
SendSYN(RcvTCB, TableHandle);
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
DerefTCB(RcvTCB, TableHandle);
return IP_SUCCESS;
}
} else {
// No SYN, just toss the frame.
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
return IP_SUCCESS;
}
}
RcvTCB->tcb_refcnt++;
NotSYNSent:
// Not in the SYN-SENT state. Check the sequence number. If my window
// is 0, I'll truncate all incoming frames but look at some of the
// control fields. Otherwise I'll try and make this segment fit into
// the window.
if (RcvTCB->tcb_rcvwin != 0) {
int StateSize; // Size, including state info.
SeqNum LastValidSeq; // Sequence number of last valid
// byte at RWE.
// We are offering a window. If this segment starts in front of my
// receive window, clip off the front part.
#if 1 // Bug #63900
//Check for the sanity of received sequence.
//This is to fix the 1 bit error(MSB) case in the rcv seq.
// Also, check the incoming size.
if ((SEQ_LT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) &&
((int)Size >= 0) &&
(RcvTCB->tcb_rcvnext - RcvInfo.tri_seq ) > 0) {
#else
if (SEQ_LT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
#endif
int AmountToClip, FinByte;
if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
// Had a SYN. Clip it off and update the sequence number.
RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
RcvInfo.tri_seq++;
RcvInfo.tri_urgent--;
}
// Advance the receive buffer to point at the new data.
AmountToClip = RcvTCB->tcb_rcvnext - RcvInfo.tri_seq;
CTEAssert(AmountToClip >= 0);
// If there's a FIN on this segment, we'll need to account for
// it.
FinByte = ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1: 0);
if (AmountToClip >= (((int) Size) + FinByte)) {
// Falls entirely before the window. We have more special
// case code here - if the ack. number acks something,
// we'll go ahead and take it, faking the sequence number
// to be rcvnext. This prevents problems on full duplex
// connections, where data has been received but not acked,
// and retransmission timers reset the seq. number to
// below our rcvnext.
if ((RcvInfo.tri_flags & TCP_FLAG_ACK) &&
SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
// This contains valid ACK info. Fudge the information
// to get through the rest of this.
Size = 0;
AmountToClip = 0;
RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN |
TCP_FLAG_RST | TCP_FLAG_URG);
#ifdef DEBUG
FinByte = 1; // Fake out assert below.
#endif
} else {
ACKAndDrop(&RcvInfo, RcvTCB);
return IP_SUCCESS;
}
}
// Trim what we have to. If we can't trim enough, the frame
// is too short. This shouldn't happen, but it it does we'll
// drop the frame.
Size -= AmountToClip;
RcvInfo.tri_seq += AmountToClip;
RcvInfo.tri_urgent -= AmountToClip;
RcvBuf = TrimRcvBuf(RcvBuf, AmountToClip);
CTEAssert(RcvBuf != NULL);
CTEAssert(RcvBuf->ipr_size != 0 ||
(Size == 0 && FinByte));
if (*(int *)&RcvInfo.tri_urgent < 0) {
RcvInfo.tri_urgent = 0;
RcvInfo.tri_flags &= ~TCP_FLAG_URG;
}
}
// We've made sure the front is OK. Now make sure part of it doesn't
// fall outside of the right edge of the window. If it does,
// we'll truncate the frame (removing the FIN, if any). If we
// truncate the whole frame we'll ACKAndDrop it.
StateSize = Size + ((RcvInfo.tri_flags & TCP_FLAG_SYN) ? 1: 0) +
((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1: 0);
if (StateSize)
StateSize--;
// Now the incoming sequence number (RcvInfo.tri_seq) + StateSize
// it the last sequence number in the segment. If this is greater
// than the last valid byte in the window, we have some overlap
// to chop off.
CTEAssert(StateSize >= 0);
LastValidSeq = RcvTCB->tcb_rcvnext + RcvTCB->tcb_rcvwin - 1;
if (SEQ_GT(RcvInfo.tri_seq + StateSize, LastValidSeq)) {
int AmountToChop;
// At least some part of the frame is outside of our window.
// See if it starts outside our window.
if (SEQ_GT(RcvInfo.tri_seq, LastValidSeq)) {
// Falls entirely outside the window. We have special
// case code to deal with a pure ack that falls exactly at
// our right window edge. Otherwise we ack and drop it.
if (!SEQ_EQ(RcvInfo.tri_seq, LastValidSeq+1) || Size != 0
|| (RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
ACKAndDrop(&RcvInfo, RcvTCB);
return IP_SUCCESS;
}
} else {
// At least some part of it is in the window. If there's a
// FIN, chop that off and see if that moves us inside.
if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
RcvInfo.tri_flags &= ~TCP_FLAG_FIN;
StateSize--;
}
// Now figure out how much to chop off.
AmountToChop = (RcvInfo.tri_seq + StateSize) - LastValidSeq;
CTEAssert(AmountToChop >= 0);
Size -= AmountToChop;
}
}
} else {
if (!SEQ_EQ(RcvTCB->tcb_rcvnext, RcvInfo.tri_seq)) {
// If there's a RST on this segment, and he's only off by 1,
// take it anyway. This can happen if the remote peer is
// probing and sends with the seq. # after the probe.
if (!(RcvInfo.tri_flags & TCP_FLAG_RST) ||
!(SEQ_EQ(RcvTCB->tcb_rcvnext, (RcvInfo.tri_seq - 1)))) {
ACKAndDrop(&RcvInfo, RcvTCB);
return IP_SUCCESS;
} else
RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
}
// He's in sequence, but we have a window of 0. Truncate the
// size, and clear any sequence consuming bits.
if (Size != 0 ||
(RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN);
Size = 0;
if (!(RcvInfo.tri_flags & TCP_FLAG_RST))
DelayAction(RcvTCB, NEED_ACK);
}
}
// At this point, the segment is in our window and does not overlap
// on either end. If it's the next sequence number we expect, we can
// handle the data now. Otherwise we'll queue it for later. In either
// case we'll handle RST and ACK information right now.
CTEAssert((*(int *)&Size) >= 0);
// Now, following 793, we check the RST bit.
if (RcvInfo.tri_flags & TCP_FLAG_RST) {
uchar Reason;
// We can't go back into the LISTEN state from SYN-RCVD here,
// because we may have notified the client via a listen completing
// or a connect indication. So, if came from an active open we'll
// give back a 'connection refused' notice. For all other cases
// we'll just destroy the connection.
if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
if (RcvTCB->tcb_flags & ACTIVE_OPEN)
Reason = TCB_CLOSE_REFUSED;
else
Reason = TCB_CLOSE_RST;
} else
Reason = TCB_CLOSE_RST;
TryToCloseTCB(RcvTCB, Reason, TableHandle);
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
RemoveTCBFromConn(RcvTCB);
NotifyOfDisc(RcvTCB, OptInfo, TDI_CONNECTION_RESET);
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
}
DerefTCB(RcvTCB, TableHandle);
return IP_SUCCESS;
}
// Next check the SYN bit.
if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
// Again, we can't quietly go back into the LISTEN state here, even
// if we came from a passive open.
TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, TableHandle);
SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
RemoveTCBFromConn(RcvTCB);
NotifyOfDisc(RcvTCB, OptInfo, TDI_CONNECTION_RESET);
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
}
DerefTCB(RcvTCB, TableHandle);
return IP_SUCCESS;
}
// Check the ACK field. If it's not on drop the segment.
if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
uint UpdateWindow;
// If we're in SYN-RCVD, go to ESTABLISHED.
if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
// The ack is valid.
#ifdef SYN_ATTACK
if (SynAttackProtect) {
CTELockHandle Handle;
//
// We will be reiniting the tcprexmitcnt to 0. If we are
// configured for syn-attack protection and the rexmit cnt
// is >1, decrement the count of connections that are
// in the half-open-retried state. Check whether we are
// below a low-watermark. If we are, increase the rexmit
// count back to configured values
//
CTEGetLockAtDPC(&SynAttLock, &Handle);
if (RcvTCB->tcb_rexmitcnt >= ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT) {
BOOLEAN Trigger;
Trigger = (TCPHalfOpen < TCPMaxHalfOpen) ||
(--TCPHalfOpenRetried <= TCPMaxHalfOpenRetriedLW);
if (Trigger && (MaxConnectResponseRexmitCountTmp == ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT))
{
MaxConnectResponseRexmitCountTmp = MAX_CONNECT_RESPONSE_REXMIT_CNT;
}
}
//
// Decrement the # of conn. in half open state
//
TCPHalfOpen--;
CTEFreeLockFromDPC(&SynAttLock, Handle);
}
#endif
RcvTCB->tcb_rexmitcnt = 0;
STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
RcvTCB->tcb_senduna++;
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_maxwin = RcvInfo.tri_window;
RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
GoToEstab(RcvTCB);
// Now complete whatever we can here.
CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
} else {
DerefTCB(RcvTCB, TableHandle);
SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
return IP_SUCCESS;
}
} else {
// We're not in SYN-RCVD. See if this acknowledges anything.
if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
uint CWin;
// The ack acknowledes something. Pull the
// appropriate amount off the send q.
ACKData(RcvTCB, RcvInfo.tri_ack);
// If this acknowledges something we were running a RTT on,
// update that stuff now.
if (RcvTCB->tcb_rtt != 0 && SEQ_GT(RcvInfo.tri_ack,
RcvTCB->tcb_rttseq)) {
short RTT;
RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
RcvTCB->tcb_rtt = 0;
RTT -= (RcvTCB->tcb_smrtt >> 3);
RcvTCB->tcb_smrtt += RTT;
RTT = (RTT >= 0 ? RTT : -RTT);
RTT -= (RcvTCB->tcb_delta >> 3);
RcvTCB->tcb_delta += RTT;
RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
MIN_RETRAN_TICKS), MAX_REXMIT_TO);
}
// If we're probing for a PMTU black hole we've found one, so turn off
// the detection. The size is already down, so leave it there.
if (RcvTCB->tcb_flags & PMTU_BH_PROBE) {
RcvTCB->tcb_flags &= ~PMTU_BH_PROBE;
RcvTCB->tcb_bhprobecnt = 0;
if (--(RcvTCB->tcb_slowcount) == 0) {
RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
CheckTCBRcv(RcvTCB);
}
}
// Update the congestion window now.
CWin = RcvTCB->tcb_cwin;
if (CWin < RcvTCB->tcb_maxwin) {
if (CWin < RcvTCB->tcb_ssthresh)
CWin += RcvTCB->tcb_mss;
else
CWin += (RcvTCB->tcb_mss * RcvTCB->tcb_mss)/CWin;
RcvTCB->tcb_cwin = MIN(CWin, RcvTCB->tcb_maxwin);
}
CTEAssert(*(int *)&RcvTCB->tcb_cwin > 0);
// We've acknowledged something, so reset the rexmit count.
// If there's still stuff outstanding, restart the rexmit
// timer.
RcvTCB->tcb_rexmitcnt = 0;
if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
RcvTCB->tcb_rexmit);
else
STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
// If we've sent a FIN, and this acknowledges it, we
// need to complete the client's close request and
// possibly transition our state.
if (RcvTCB->tcb_flags & FIN_SENT) {
// We have sent a FIN. See if it's been acknowledged.
// Once we've sent a FIN, tcb_sendmax
// can't advance, so our FIN must have seq. number
// tcb_sendmax - 1. Thus our FIN is acknowledged
// if the incoming ack is equal to tcb_sendmax.
if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
// He's acked our FIN. Turn off the flags,
// and complete the request. We'll leave the
// FIN_OUTSTANDING flag alone, to force early
// outs in the send code.
RcvTCB->tcb_flags &= ~(FIN_NEEDED | FIN_SENT);
CTEAssert(RcvTCB->tcb_unacked == 0);
CTEAssert(RcvTCB->tcb_sendnext ==
RcvTCB->tcb_sendmax);
// Now figure out what we need to do. In FIN_WAIT1
// or FIN_WAIT, just complete the disconnect req.
// and continue. Otherwise, it's a bit trickier,
// since we can't complete the connreq until we
// remove the TCB from it's connection.
switch (RcvTCB->tcb_state) {
case TCB_FIN_WAIT1:
RcvTCB->tcb_state = TCB_FIN_WAIT2;
CompleteConnReq(RcvTCB, OptInfo,
TDI_SUCCESS);
// Start a timer in case we never get
// out of FIN_WAIT2. Set the retransmit
// count high to force a timeout the
// first time the timer fires.
RcvTCB->tcb_rexmitcnt = MaxDataRexmitCount;
START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
FinWait2TO);
// Fall through to FIN-WAIT-2 processing.
case TCB_FIN_WAIT2:
break;
case TCB_CLOSING:
GracefulClose(RcvTCB, TRUE, FALSE,
TableHandle);
return IP_SUCCESS;
break;
case TCB_LAST_ACK:
GracefulClose(RcvTCB, FALSE, FALSE,
TableHandle);
return IP_SUCCESS;
break;
default:
DEBUGCHK;
break;
}
}
}
UpdateWindow = TRUE;
} else {
// It doesn't ack anything. If it's an ack for something
// larger than we've sent then ACKAndDrop it, otherwise
// ignore it. If we're in FIN_WAIT2, we'll restart the timer.
// We don't make this check above because we know no
// data can be acked when we're in FIN_WAIT2.
if (RcvTCB->tcb_state == TCB_FIN_WAIT2)
START_TCB_TIMER(RcvTCB->tcb_rexmittimer, FinWait2TO);
if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
ACKAndDrop(&RcvInfo, RcvTCB);
return IP_SUCCESS;
} else {
// Now update the window if we can.
if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
(SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
(SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
UpdateWindow = TRUE;
} else
UpdateWindow = FALSE;
}
}
if (UpdateWindow) {
RcvTCB->tcb_sendwin = RcvInfo.tri_window;
RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
RcvInfo.tri_window);
RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
if (RcvInfo.tri_window == 0) {
// We've got a zero window.
if (!EMPTYQ(&RcvTCB->tcb_sendq)) {
RcvTCB->tcb_flags &= ~NEED_OUTPUT;
RcvTCB->tcb_rexmitcnt = 0;
START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
RcvTCB->tcb_rexmit);
if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
RcvTCB->tcb_flags |= FLOW_CNTLD;
RcvTCB->tcb_slowcount++;
RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
CheckTCBRcv(RcvTCB);
}
}
} else {
if (RcvTCB->tcb_flags & FLOW_CNTLD) {
RcvTCB->tcb_rexmitcnt = 0;
RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
MIN_RETRAN_TICKS), MAX_REXMIT_TO);
if (TCB_TIMER_RUNNING(RcvTCB->tcb_rexmittimer)) {
START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
RcvTCB->tcb_rexmit);
}
RcvTCB->tcb_flags &= ~(FLOW_CNTLD | FORCE_OUTPUT);
// Reset send next to the left edge of the window,
// because it might be at senduna+1 if we've been
// probing.
ResetSendNext(RcvTCB, RcvTCB->tcb_senduna);
if (--(RcvTCB->tcb_slowcount) == 0) {
RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
CheckTCBRcv(RcvTCB);
}
}
// Since we've updated the window, see if we can send
// some more.
if (RcvTCB->tcb_unacked != 0 ||
(RcvTCB->tcb_flags & FIN_NEEDED))
DelayAction(RcvTCB, NEED_OUTPUT);
}
}
}
// We've handled all the acknowledgment stuff. If the size
// is greater than 0 or important bits are set process it further,
// otherwise it's a pure ack and we're done with it.
if (Size > 0 || (RcvInfo.tri_flags & TCP_FLAG_FIN)) {
// If we're not in a state where we can process incoming data
// or FINs, there's no point in going further. Just send an
// ack and drop this segment.
if (!DATA_RCV_STATE(RcvTCB->tcb_state) ||
(RcvTCB->tcb_flags & GC_PENDING)) {
ACKAndDrop(&RcvInfo, RcvTCB);
return IP_SUCCESS;
}
// If it's in sequence process it now, otherwise reassemble it.
if (SEQ_EQ(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
// If we're already in the recv. handler, this is a
// duplicate. We'll just toss it.
if (RcvTCB->tcb_fastchk & TCP_FLAG_IN_RCV) {
DerefTCB(RcvTCB, TableHandle);
return IP_SUCCESS;
}
RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
// Now loop, pulling things from the reassembly queue, until
// the queue is empty, or we can't take all of the data,
// or we hit a FIN.
do {
// Handle urgent data, if any.
if (RcvInfo.tri_flags & TCP_FLAG_URG) {
HandleUrgent(RcvTCB, &RcvInfo, RcvBuf, &Size);
// Since we may have freed the lock, we need to recheck
// and see if we're closing here.
if (CLOSING(RcvTCB))
break;
}
// OK, the data is in sequence, we've updated the
// reassembly queue and handled any urgent data. If we
// have any data go ahead and process it now.
if (Size > 0) {
#ifdef VXD
CTEFreeLock(&RcvTCB->tcb_lock, TableHandle);
BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB,
RcvInfo.tri_flags, RcvBuf, Size);
CTEGetLock(&RcvTCB->tcb_lock, &TableHandle);
#else
BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB,
RcvInfo.tri_flags, RcvBuf, Size);
#endif
RcvTCB->tcb_rcvnext += BytesTaken;
RcvTCB->tcb_rcvwin -= BytesTaken;
CheckTCBRcv(RcvTCB);
if (RcvTCB->tcb_flags & ACK_DELAYED)
DelayAction(RcvTCB, NEED_ACK);
else {
RcvTCB->tcb_flags |= ACK_DELAYED;
START_TCB_TIMER(RcvTCB->tcb_delacktimer,
DEL_ACK_TICKS);
}
if (BytesTaken != Size) {
// We didn't take everything we could. No
// use in further processing, just bail
// out.
DelayAction(RcvTCB, NEED_ACK);
break;
}
// If we're closing now, we're done, so get out.
if (CLOSING(RcvTCB))
break;
}
// See if we need to advance over some urgent data.
if (RcvTCB->tcb_flags & URG_VALID) {
uint AdvanceNeeded;
// We only need to advance if we're not doing
// urgent inline. Urgent inline also has some
// implications for when we can clear the URG_VALID
// flag. If we're not doing urgent inline, we can
// clear it when rcvnext advances beyond urgent end.
// If we are doing inline, we clear it when rcvnext
// advances one receive window beyond urgend.
if (!(RcvTCB->tcb_flags & URG_INLINE)) {
if (RcvTCB->tcb_rcvnext == RcvTCB->tcb_urgstart)
RcvTCB->tcb_rcvnext = RcvTCB->tcb_urgend +
1;
else
CTEAssert(SEQ_LT(RcvTCB->tcb_rcvnext,
RcvTCB->tcb_urgstart) ||
SEQ_GT(RcvTCB->tcb_rcvnext,
RcvTCB->tcb_urgend));
AdvanceNeeded = 0;
} else
AdvanceNeeded = RcvTCB->tcb_defaultwin;
// See if we can clear the URG_VALID flag.
if (SEQ_GT(RcvTCB->tcb_rcvnext - AdvanceNeeded,
RcvTCB->tcb_urgend)) {
RcvTCB->tcb_flags &= ~URG_VALID;
if (--(RcvTCB->tcb_slowcount) == 0) {
RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
CheckTCBRcv(RcvTCB);
}
}
}
// We've handled the data. If the FIN bit is set, we
// have more processing.
if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
uint Notify = FALSE;
RcvTCB->tcb_rcvnext++;
DelayAction(RcvTCB, NEED_ACK);
PushData(RcvTCB);
switch (RcvTCB->tcb_state) {
case TCB_SYN_RCVD:
// I don't think we can get here - we
// should have discarded the frame if it
// had no ACK, or gone to established if
// it did.
DEBUGCHK;
case TCB_ESTAB:
RcvTCB->tcb_state = TCB_CLOSE_WAIT;
// We left established, we're off the
// fast path.
RcvTCB->tcb_slowcount++;
RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
CheckTCBRcv(RcvTCB);
Notify = TRUE;
break;
case TCB_FIN_WAIT1:
RcvTCB->tcb_state = TCB_CLOSING;
Notify = TRUE;
break;
case TCB_FIN_WAIT2:
// Stop the FIN_WAIT2 timer.
STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
RcvTCB->tcb_refcnt++;
GracefulClose(RcvTCB, TRUE, TRUE,
TableHandle);
CTEGetLockAtDPC(&RcvTCB->tcb_lock,
&TableHandle);
break;
default:
DEBUGCHK;
break;
}
if (Notify) {
CTEFreeLockFromDPC(&RcvTCB->tcb_lock,
TableHandle);
NotifyOfDisc(RcvTCB, OptInfo, TDI_GRACEFUL_DISC);
CTEGetLockAtDPC(&RcvTCB->tcb_lock,
&TableHandle);
}
break; // Exit out of WHILE loop.
}
// If the reassembly queue isn't empty, get what we
// can now.
RcvBuf = PullFromRAQ(RcvTCB, &RcvInfo, &Size);
CheckRBList(RcvBuf, Size);
} while (RcvBuf != NULL);
RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
DelayAction(RcvTCB, NEED_OUTPUT);
}
DerefTCB(RcvTCB, TableHandle);
return IP_SUCCESS;
} else {
// It's not in sequence. Since it needs further processing,
// put in on the reassembly queue.
if (DATA_RCV_STATE(RcvTCB->tcb_state) &&
!(RcvTCB->tcb_flags & GC_PENDING)) {
PutOnRAQ(RcvTCB, &RcvInfo, RcvBuf, Size);
CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
SendACK(RcvTCB);
CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
DerefTCB(RcvTCB, TableHandle);
} else
ACKAndDrop(&RcvInfo, RcvTCB);
return IP_SUCCESS;
}
}
} else {
// No ACK. Just drop the segment and return.
DerefTCB(RcvTCB, TableHandle);
return IP_SUCCESS;
}
DerefTCB(RcvTCB, TableHandle);
} else // DataOffset <= Size
TStats.ts_inerrs++;
} else {
// Bump bad xsum counter.
TStats.ts_inerrs++;
}
} else // IsBCast
TStats.ts_inerrs++;
return IP_SUCCESS;
}
#pragma BEGIN_INIT
//* InitTCPRcv - Initialize TCP receive side.
//
// Called during init time to initialize our TCP receive side.
//
// Input: Nothing.
//
// Returns: TRUE.
//
int
InitTCPRcv(void)
{
#ifdef NT
ExInitializeSListHead(&TCPRcvReqFree);
#endif
CTEInitLock(&RequestCompleteLock);
CTEInitLock(&TCBDelayLock);
CTEInitLock(&TCPRcvReqFreeLock);
INITQ(&ConnRequestCompleteQ);
INITQ(&SendCompleteQ);
INITQ(&TCBDelayQ);
RequestCompleteFlags = 0;
TCBDelayRtnCount = 0;
#ifdef VXD
TCBDelayRtnLimit = 1;
#endif
#ifdef NT
TCBDelayRtnLimit = (uint) (** (PCHAR *) &KeNumberProcessors);
if (TCBDelayRtnLimit > TCB_DELAY_RTN_LIMIT)
TCBDelayRtnLimit = TCB_DELAY_RTN_LIMIT;
#endif
DummyBuf.ipr_owner = IPR_OWNER_IP;
DummyBuf.ipr_size = 0;
DummyBuf.ipr_next = 0;
DummyBuf.ipr_buffer = NULL;
return TRUE;
}
//* UnInitTCPRcv - Uninitialize our receive side.
//
// Called if initialization fails to uninitialize our receive side.
//
//
// Input: Nothing.
//
// Returns: Nothing.
//
void
UnInitTCPRcv(void)
{
}
#pragma END_INIT