xbox-kernel/private/ntos/xnet/ip/iprecv.c
2020-09-30 17:17:25 +02:00

825 lines
21 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*++
Copyright (c) 2000 Microsoft Corporation
Module Name:
iprecv.c
Abstract:
Handles IP datagram reception
Revision History:
05/17/2000 davidx
Created it.
--*/
#include "precomp.h"
//
// A singly-linked list of datagrams that're currently
// being reassembled. We're using a list rather than a
// hash table here for simplicity. We assume that fragmentation
// occurs very infrequently.
//
PRIVATE Packet* IpReassemblyPktList;
PRIVATE UINT IpReassemblyPktCount;
// Maximum number of actively reassembled datagrams
UINT cfgMaxReassemblyDgrams = 4;
// Maximum size for a reassembled datagram
UINT cfgMaxReassemblySize = 2048;
// Reassembly timeout period = 60sec
UINT cfgReassemblyTimeout = 60;
//
// Packet containing a datagram that's being reassembled:
// packet header
// reassembly information
// space to hold maximum IP header
// actual datagram data
//
typedef struct _ReassemblyHeader {
IPADDR srcaddr; // source IP address
IPADDR dstaddr; // destination IP address
UINT proto_id; // datagram ID and protocol
UINT timer; // reassembly timer
UINT origdatalen; // total length of the original IP datagram
DWORD bitFlags[1]; // which fragments have arrived?
} ReassemblyHeader;
VOID
IpCleanupReassemblyPkts(
IfInfo* ifp
)
/*++
Routine Description:
Clean up all partially reassembly datagrams
Arguments:
ifp - Optional parameter: when present we only clean up
the partial datagrams from the specified interface;
otherwise, we clean up all partial datagrams.
Return Value:
NONE
--*/
{
Packet** link = &IpReassemblyPktList;
Packet* pkt;
while ((pkt = *link) != NULL) {
if (ifp == pkt->recvifp || ifp == NULL) {
*link = pkt->nextpkt;
IpReassemblyPktCount--;
XnetCompletePacket(pkt, NETERR_CANCELLED);
} else {
link = &pkt->nextpkt;
}
}
}
VOID
IpReassemblyTimerProc()
/*++
Routine Description:
IP datagram reassembly timer routine
Arguments:
NONE
Return Value:
NONE
--*/
{
Packet** link = &IpReassemblyPktList;
Packet* pkt;
ReassemblyHeader* rahdr;
while ((pkt = *link) != NULL) {
rahdr = GETPKTBUF(pkt, ReassemblyHeader);
if (rahdr->timer-- == 0) {
TRACE_("Reassembly timeout: src = %s", IPADDRSTR(rahdr->srcaddr));
*link = pkt->nextpkt;
IpReassemblyPktCount--;
// Send out an ICMP error message if fragment 0 has been received
if (pkt->iphdrOffset) {
pkt->data = (BYTE*) GETPKTIPHDR(pkt);
IcmpSendError(
pkt,
ICMPTYPE_TIME_EXECEEDED,
ICMPCODE_REASSEMBLY_TIMEOUT);
}
XnetCompletePacket(pkt, NETERR_REASSEMBLY);
} else
link = &pkt->nextpkt;
}
}
PRIVATE Packet**
IpFindReassemblyPkt(
IPADDR srcaddr,
IPADDR dstaddr,
UINT proto_id
)
/*++
Routine Description:
Check if we already have a partially assembled datagram
matching the specified identification information.
Arguments:
srcaddr - Specifies the source IP address
dstaddr - Specifies the destination IP address
proto_id - IP protocol number and datagram ID
Return Value:
Address of the link pointer to the specified datagram
--*/
{
Packet** link = &IpReassemblyPktList;
Packet* pkt;
ReassemblyHeader* rahdr;
while ((pkt = *link) != NULL) {
rahdr = GETPKTBUF(pkt, ReassemblyHeader);
if (rahdr->srcaddr == srcaddr &&
rahdr->dstaddr == dstaddr &&
rahdr->proto_id == proto_id)
break;
link = &pkt->nextpkt;
}
return link;
}
PRIVATE Packet*
IpAllocReassemblyPkt(
IPADDR srcaddr,
IPADDR dstaddr,
UINT proto_id,
IfInfo* ifp
)
/*++
Routine Description:
Allocate buffer for assembling a datagram
Arguments:
srcaddr - Specifies the source IP address
dstaddr - Specifies the destination IP address
proto_id - IP protocol number and datagram ID
ifp - Points to the receiving interface
Return Value:
Pointer to the allocated packet
NULL if there is an error
--*/
{
Packet* pkt;
UINT hdrsize;
ReassemblyHeader* rahdr;
//
// We reserve some space in the head of the packet buffer
// for storing reassembly information.
//
hdrsize = offsetof(ReassemblyHeader, bitFlags) +
(((cfgMaxReassemblySize / 8) + 31) / 32) * 4 +
MAXIPHDRLEN;
pkt = XnetAllocPacket(hdrsize + cfgMaxReassemblySize, PKTFLAG_NETPOOL);
if (!pkt) return NULL;
pkt->iphdrOffset = 0;
rahdr = GETPKTBUF(pkt, ReassemblyHeader);
ZeroMem(rahdr, hdrsize);
rahdr->timer = cfgReassemblyTimeout;
rahdr->srcaddr = srcaddr;
rahdr->dstaddr = dstaddr;
rahdr->proto_id = proto_id;
pkt->data += hdrsize;
CACHE_IFP_REFERENCE(pkt->recvifp,ifp);
return pkt;
}
PRIVATE BOOL
IpUpdateFragmentFlags(
Packet* pkt,
UINT fragOffset,
UINT fragCount
)
/*++
Routine Description:
Update the bit flags to indicate which fragments have already arrived
Arguments:
pkt - Points to the packet contain partially assembled datagram
fragOffset - Starting chunk offset for the current fragment
fragCount - Number of 8 byte chunks in the current fragment
Return Value:
TRUE if the entire datagram has been completed
FALSE otherwise
--*/
{
UINT chunks, i;
ReassemblyHeader* rahdr = GETPKTBUF(pkt, ReassemblyHeader);
// Update flag bits
chunks = fragOffset % 32;
i = fragOffset / 32;
if (chunks) {
UINT bits = min(fragCount, 32-chunks);
rahdr->bitFlags[i++] |= (((1 << bits) - 1) << chunks);
fragCount -= bits;
}
while (fragCount >= 32) {
rahdr->bitFlags[i++] = 0xffffffff;
fragCount -= 32;
}
if (fragCount)
rahdr->bitFlags[i] |= (1 << fragCount) - 1;
// We don't know the total datagram length yet,
// which must mean we haven't finished reassembly.
if (rahdr->origdatalen == 0) return FALSE;
// Check if all bit flags are set
i = 0;
chunks = (rahdr->origdatalen + 7) / 8;
while (chunks >= 32) {
if (rahdr->bitFlags[i++] != 0xffffffff) return FALSE;
chunks -= 32;
}
return (chunks == 0 ||
rahdr->bitFlags[i] == (1u << chunks) - 1);
}
PRIVATE VOID
IpReassemblePkt(
Packet* pkt
)
/*++
Routine Description:
Handle IP datagram reassembly:
called when a datagram fragment is received
Arguments:
pkt - Points to the received packet (datagram fragment)
Return Value:
NONE
NOTE:
We assume that fragmented datagrams are rare. Consequently,
our priority here is implementation simplicity (over efficiency).
--*/
{
IpHeader* iphdr = GETPKTIPHDR(pkt);
UINT iphdrlen = GETIPHDRLEN(iphdr);
UINT iplen = GETIPLEN(iphdr);
UINT proto_id, fragOffset, lastbyte;
BOOL moreFlag;
Packet** link;
Packet* rapkt;
ReassemblyHeader* rahdr;
// Extract datagram header information
fragOffset = NTOHS(iphdr->fragoffset);
moreFlag = (fragOffset & MORE_FRAGMENTS) != 0;
fragOffset &= FRAGOFFSET_MASK;
proto_id = NTOHS(iphdr->id);
proto_id = (proto_id << 16) | iphdr->protocol;
link = IpFindReassemblyPkt(iphdr->srcaddr, iphdr->dstaddr, proto_id);
// Check if we already have fragments from the same datagram.
// If no existing fragments are found, then we need to
// allocate a new reassembly buffer
if ((rapkt = *link) == NULL) {
if (IpReassemblyPktCount >= cfgMaxReassemblyDgrams) {
WARNING_("Too many fragmented IP datagrams");
goto discard_fragment;
}
rapkt = IpAllocReassemblyPkt(iphdr->srcaddr, iphdr->dstaddr, proto_id, pkt->recvifp);
if (!rapkt) goto discard_fragment;
*link = rapkt;
IpReassemblyPktCount++;
}
if ((iplen -= iphdrlen) == 0) goto reassembly_failed;
lastbyte = fragOffset*8 + iplen;
if (lastbyte > cfgMaxReassemblySize) {
WARNING_("Exceeded max reassembly size");
goto reassembly_failed;
}
rahdr = GETPKTBUF(rapkt, ReassemblyHeader);
if (moreFlag) {
// Only the last fragment can have size that's not a multiple of 8.
if (iplen % 8 != 0) goto reassembly_failed;
} else {
// Update the total length field for the entire datagram
if (rahdr->origdatalen && rahdr->origdatalen != lastbyte)
goto reassembly_failed;
rahdr->origdatalen = lastbyte;
}
// Check if we got the first fragment.
// Remember the original IP datagram header if we did.
if (fragOffset == 0 && rapkt->iphdrOffset == 0) {
BYTE* hdr = rapkt->data - iphdrlen;
SETPKTIPHDR(rapkt, hdr);
CopyMem(hdr, pkt->data, iphdrlen);
rapkt->pktflags |= (pkt->pktflags & PKTFLAG_MCAST);
}
// Copy the fragment data into the reassembly buffer
CopyMem(rapkt->data + fragOffset*8, pkt->data + iphdrlen, iplen);
XnetCompletePacket(pkt, NETERR_OK);
// Update the flags to indicate which fragments have already arrived.
// If the datagram is complete, pass it upstream for processing.
if (IpUpdateFragmentFlags(rapkt, fragOffset, (iplen+7)/8)) {
*link = rapkt->nextpkt;
IpReassemblyPktCount--;
rapkt->nextpkt = NULL;
iphdr = GETPKTIPHDR(rapkt);
iphdrlen = GETIPHDRLEN(iphdr);
rapkt->data = (BYTE*) iphdr;
rapkt->datalen = iphdrlen + rahdr->origdatalen;
iphdr->length = (WORD) HTONS(rapkt->datalen);
iphdr->fragoffset &= ~HTONS(MORE_FRAGMENTS|FRAGOFFSET_MASK);
COMPUTE_CHECKSUM(iphdr->hdrxsum, iphdr, iphdrlen);
IpReceivePacket(rapkt);
}
return;
reassembly_failed:
// We detected an error during the reassembly process.
// Flush all associated fragments.
*link = rapkt->nextpkt;
IpReassemblyPktCount--;
TRACE_("Datagram reassembly failed");
XnetCompletePacket(rapkt, NETERR_REASSEMBLY);
discard_fragment:
// Discard this fragment and return
XnetCompletePacket(pkt, NETERR_DISCARDED);
}
PRIVATE BOOL
IpProcessRecvOptions(
BYTE* buf,
UINT buflen,
IfInfo* ifp
)
/*++
Routine Description:
Process options in a received IP datagram
Arguments:
buf - Points to the option data buffer
buflen - Size of the buffer, in bytes
ifp - The interface that received the datagram
Return Value:
TRUE if the option data is valid
FALSE if there is an error and the datagram should be discarded
--*/
{
BYTE opt, optlen, ptr;
IPADDR* paddr;
while (buflen) {
// reached the end of option list?
if ((opt = *buf) == IPOPT_EOL) break;
// skip over the NOP option
if (opt == IPOPT_NOP) {
buf++; buflen--;
continue;
}
// check the option length field
if (buflen < 2 || (optlen = buf[1]) < 2 || optlen > buflen)
return FALSE;
switch (opt) {
case IPOPT_LOOSE_SRCROUTE:
case IPOPT_STRICT_SRCROUTE:
// loose or strict source and record route options
if (optlen % 4 != 3 || (ptr = buf[2]) < 4 || ptr % 4 != 0)
return FALSE;
// reached the end of the IP address list?
if (ptr > optlen) break;
// only one address left and it's ours
// this shouldn't happen but we handle it anyway
paddr = (IPADDR*) (buf+ptr-1);
if (optlen - ptr == 3 && ifp->ipaddr && ifp->ipaddr == *paddr) {
*paddr = 0;
break;
}
// otherwise, discard the datagram since we don't
// participate in source routing.
return FALSE;
case IPOPT_RECORD_ROUTE:
// record route
if (optlen % 4 != 3 || (ptr = buf[2]) < 4 || ptr % 4 != 0)
return FALSE;
break;
case IPOPT_TIMESTAMP:
// internet timestamp
if (optlen < 4 || optlen % 4 != 0 ||
(ptr = buf[2]) < 5 || ptr % 4 != 1)
return FALSE;
break;
}
// move on to the next option
buf += optlen;
buflen -= optlen;
}
return TRUE;
}
IPADDR
IpReflectIpOptions(
IfInfo* ifp,
IpHeader* iphdr,
IPADDR dstaddr
)
/*++
Routine Description:
Reflect any IP options in a received datagram
Arguments:
ifp - Specifies the outgoing interface
iphdr - Points to the received datagram header
dstaddr - Specifies the final destination address
Return Value:
New destination address
Note:
We assume the option data in the received datagram
has already been validated.
--*/
{
BYTE* buf;
BYTE opt, optlen, ptr;
UINT buflen;
IPADDR* paddr;
buf = (BYTE*) iphdr + IPHDRLEN;
buflen = GETIPHDRLEN(iphdr) - IPHDRLEN;
while (buflen) {
if ((opt = *buf) == IPOPT_EOL) break;
if (opt == IPOPT_NOP) {
buf++; buflen--;
continue;
}
optlen = buf[1];
switch (opt) {
case IPOPT_LOOSE_SRCROUTE:
case IPOPT_STRICT_SRCROUTE:
//
// We assume the record route option contains the following:
// G1 G2 ... Gn
// and we'll modify it to:
// Gn-1 ... G2 G1 D
// and return Gn as the new destination address
//
if ((ptr = buf[2]) > 4) {
IPADDR addr;
IPADDR* pend;
ASSERT(optlen % 4 == 3 && ptr % 4 == 0);
if (ptr > optlen) ptr = (BYTE) (optlen+1);
paddr = (IPADDR*) (buf+3);
pend = (IPADDR*) (buf+ptr-5);
// swap Gn and D
addr = *pend;
// simple sanity check
// if the gateway address is bad, we'll just
// leave the source route option alone
if (!XnetIsValidUnicastAddr(addr)) break;
*pend = dstaddr;
dstaddr = addr;
pend--;
// flip G1 ... Gn-1
while (pend > paddr) {
addr = *pend;
*pend = *paddr;
*paddr = addr;
paddr++;
pend--;
}
// reset ptr field
buf[2] = 4;
}
break;
case IPOPT_RECORD_ROUTE:
//
// If there is room left, record our own IP address
//
if ((ptr = buf[2]) > optlen) break;
ASSERT(optlen % 4 == 3 && ptr >= 4 && ptr % 4 == 0);
paddr = (IPADDR*) (buf+ptr-1);
*paddr = ifp->ipaddr;
buf[2] += 4;
break;
case IPOPT_TIMESTAMP:
if ((ptr = buf[2]) > optlen) {
// increment the overflow count if we don't have room
if ((buf[3] & 0xf0) < 0xf0) buf[3] += 0x10;
} else {
UINT timestamp;
DWORD* p;
ASSERT(optlen % 4 == 0 && optlen > 4 &&
ptr >= 5 && ptr % 4 == 1);
p = (DWORD*) (buf+ptr-1);
timestamp = IpGetMsecsSinceMidnightGMT();
switch (buf[3] & 0xf) {
case 3:
if (*p != ifp->ipaddr) break;
// fall through
case 1:
if (optlen-ptr < 7) break;
*p++ = ifp->ipaddr;
buf[2] += 4;
ptr += 4;
// fall through
case 0:
if (optlen-ptr < 3) break;
*p = HTONL(timestamp);
buf[2] += 4;
break;
}
}
break;
}
buf += optlen;
buflen -= optlen;
}
return dstaddr;
}
VOID
IpReceivePacket(
Packet* pkt
)
/*++
Routine Description:
Receive an IP datagram
(called by the NIC interface driver)
Arguments:
pkt - Points to the received packet
Return Value:
NONE
--*/
{
IpHeader* iphdr;
IPADDR dstaddr, srcaddr;
UINT iphdrlen, iplen;
IfInfo* ifp;
ASSERT_DISPATCH_LEVEL();
//
// NOTE:
// 1. We assume the received packet is at least large enough
// to hold the default IP header.
// 2. It's more efficient if the interface driver can manage
// to place the IP header at DWORD-aligned boundary.
// This is NOT currently the case for EPRO100 interface driver:
// the Ethernet frame header starts at DWORD-aligned address;
// which causes the IP header to start at odd-WORD boundary.
//
ASSERT(pkt->datalen >= IPHDRLEN);
// verify header and packet length
iphdr = (IpHeader*) pkt->data;
SETPKTIPHDR(pkt, iphdr);
iphdrlen = VERIFY_IPVER_HDRLEN(iphdr->ver_hdrlen);
iplen = GETIPLEN(iphdr);
if (iphdrlen < IPHDRLEN || iphdrlen > iplen || iplen > pkt->datalen)
goto discard;
// if the received packet length is larger than the IP length,
// ignore the data at the end
pkt->datalen = iplen;
// verify header checksum
if (tcpipxsum(0, pkt->data, iphdrlen) != 0xffff)
goto discard;
//
// Determine if the packet is addressed to us.
//
// NOTE:
//
// 1. We reject a packet if the destination address doesn't
// match the address of the interface it's received on
// (i.e. we implement the strong ES model).
//
// 2. We don't check if a packet with an IP broadcast/multicast
// destination address was received as unicast on the link-layer
// interface. Converse, we don't check if a packet with a unicast
// IP destination address was received as broadcast/multicast
// on the link-layer interface.
//
// 3. We'll receive multicast packets even before the interface
// has a valid IP address.
//
// 4. We only support the all-1's form of broadcast address.
// Also, we only support limited broadcasts and subnet
// directed broadcasts (i.e. no special case for directed
// broadcasts and all-subnets directed broadcasts).
//
// 5. We discard all datagrams whose source address is
// a broadcast or multicast address.
//
ifp = pkt->recvifp;
dstaddr = iphdr->dstaddr;
if (dstaddr == ifp->ipaddr && ifp->ipaddr != 0 ||
IS_MCAST_IPADDR(dstaddr) && IfFindMcastGroup(ifp, dstaddr) ||
IfBcastAddr(ifp, dstaddr) ||
IfLoopback(ifp))
{
srcaddr = iphdr->srcaddr;
if (IfBcastAddr(ifp, srcaddr) ||
IS_MCAST_IPADDR(srcaddr) ||
IS_LOOPBACK_IPADDR(srcaddr) && !IfLoopback(ifp))
goto discard;
// Check if there is fragmentation involved
if (iphdr->fragoffset & HTONS(MORE_FRAGMENTS|FRAGOFFSET_MASK)) {
IpReassemblePkt(pkt);
return;
}
// Process options if needed
if (iphdrlen > IPHDRLEN &&
!IpProcessRecvOptions(pkt->data+IPHDRLEN, iphdrlen-IPHDRLEN, ifp)) {
// NOTE: we could send a parameter problem ICMP packet here.
TRACE_("IP datagram option error");
goto discard;
}
pkt->data += iphdrlen;
pkt->datalen -= iphdrlen;
switch (iphdr->protocol) {
case IPPROTOCOL_TCP:
TcpReceivePacket(pkt);
break;
case IPPROTOCOL_UDP:
UdpReceivePacket(pkt);
break;
case IPPROTOCOL_ICMP:
IcmpReceivePacket(pkt);
break;
case IPPROTOCOL_IGMP:
IgmpReceivePacket(pkt);
break;
default:
RawReceivePacket(pkt);
break;
}
return;
}
discard:
// Discard the packet and return
XnetCompletePacket(pkt, NETERR_DISCARDED);
}