2020-09-30 17:17:25 +02:00

264 lines
9.1 KiB
NASM

title "Compute TCP/IP Checksum"
;/*++
;
; Copyright (c) 1992 Microsoft Corporation
;
; Module Name:
;
; tcpipxsum.asm
;
; Abstract:
;
; This module implements a function to compute the checksum of a buffer.
;
; Author:
;
; David N. Cutler (davec) 27-Jan-1992
;
; Revision History:
;
; Who When What
; -------- -------- ----------------------------------------------
; mikeab 01-22-94 Pentium optimization
;
;--*/
LOOP_UNROLLING_BITS equ 4
LOOP_UNROLLING equ (1 SHL LOOP_UNROLLING_BITS)
.386
.model small,c
assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
assume fs:nothing,gs:nothing
.xlist
include callconv.inc
include ks386.inc
.list
XNET SEGMENT DWORD PUBLIC 'CODE'
;++
;
; ULONG
; tcpipxsum(
; IN ULONG xsum,
; IN const VOID* buf,
; IN UINT len
; )
;
; Routine Description:
;
; This function computes the checksum of the specified buffer.
; It uses Processor's prefetch instruction.
;
; Arguments:
;
; xsum - Suppiles the initial checksum value, in 16-bit form,
; with the high word set to 0.
;
; buf - Supplies a pointer to the buffer to the checksum buffer.
;
; len - Supplies the length of the buffer in bytes.
;
; Return Value:
;
; The computed checksum in 32-bit two-partial-accumulators form, added to
; the initial checksum, is returned as the function value.
;
;--
xsum equ 12 ; stack offset to initial checksum
buf equ 16 ; stack offset to source address
len equ 20 ; stack offset to length in words
to_checksum_last_word:
jmp checksum_last_word
to_checksum_done:
jmp checksum_done
to_checksum_dword_loop_done:
jmp checksum_dword_loop_done
cPublicProc tcpipxsum,3
push ebx ; save nonvolatile register
push esi ; save nonvolatile register
mov ecx,[esp + len] ; get length in bytes
sub eax,eax ; clear computed checksum
test ecx,ecx ; any bytes to checksum at all?
jz short to_checksum_done ; no bytes to checksum
;
; if the checksum buffer is not word aligned, then add the first byte of
; the buffer to the input checksum.
;
mov esi,[esp + buf] ; get source address
sub edx,edx ; set up to load word into EDX below
test esi,1 ; check if buffer word aligned
jz short checksum_word_aligned ; if zf, buffer word aligned
mov ah,[esi] ; get first byte (we know we'll have
; to swap at the end)
inc esi ; increment buffer address
dec ecx ; decrement number of bytes
jz short to_checksum_done ; if zf set, no more bytes
;
; If the buffer is not an even number of of bytes, then initialize
; the computed checksum with the last byte of the buffer.
;
checksum_word_aligned: ;
shr ecx,1 ; convert to word count
jnc short checksum_start ; if nc, even number of bytes
mov al,[esi+ecx*2] ; initialize the computed checksum
jz short to_checksum_done ; if zf set, no more bytes
;
; Compute checksum in large blocks of dwords, with one partial word up front if
; necessary to get dword alignment, and another partial word at the end if
; needed.
;
;
; Compute checksum on the leading word, if that's necessary to get dword
; alignment.
;
checksum_start: ;
test esi,02h ; check if source dword aligned
jz short checksum_dword_aligned ; source is already dword aligned
mov dx,[esi] ; get first word to checksum
add esi,2 ; update source address
add eax,edx ; update partial checksum
; (no carry is possible, because EAX
; and EDX are both 16-bit values)
dec ecx ; count off this word (zero case gets
; picked up below)
;
; Checksum as many words as possible by processing a dword at a time.
;
checksum_dword_aligned:
push ecx ; so we can tell if there's a trailing
; word later
shr ecx,1 ; # of dwords to checksum
jz short to_checksum_last_word ; no dwords to checksum
mov edx,[esi] ; preload the first dword
add esi,4 ; point to the next dword
dec ecx ; count off the dword we just loaded
jz short to_checksum_dword_loop_done
; skip the loop if that was the only dword
mov ebx,ecx ; EBX = # of dwords left to checksum
add ecx,LOOP_UNROLLING-1 ; round up loop count
shr ecx,LOOP_UNROLLING_BITS ; convert from word count to unrolled loop count
and ebx,LOOP_UNROLLING-1 ; # of partial dwords to do in first loop
jz short checksum_dword_loop ; special-case when no partial loop,
; because fixup below doesn't work
; in that case (carry flag is
; cleared at this point, as required
; at loop entry)
lea esi,[esi+ebx*4-(LOOP_UNROLLING*4)]
; adjust buffer pointer back to
; compensate for hardwired displacement
; at loop entry point
; ***doesn't change carry flag***
jmp loop_entry[ebx*4] ; enter the loop to do the first,
; partial iteration, after which we can
; just do 32-word blocks
; ***doesn't change carry flag***
checksum_dword_loop:
; prefetch the 32-byte cache line from [esi+0]
db 0fH
db 18H
db 46H
db 00H
; prefetch the 32-byte cache line from [esi+20h]
db 0fH
db 18H
db 46H
db 20H
DEFLAB macro pre,suf
pre&suf:
endm
TEMP=0
REPT LOOP_UNROLLING
deflab loop_entry_,%TEMP
adc eax,edx
mov edx,[esi + TEMP]
TEMP=TEMP+4
ENDM
checksum_dword_loop_end:
lea esi,[esi + LOOP_UNROLLING * 4] ; update source address
; ***doesn't change carry flag***
dec ecx ; count off unrolled loop iteration
; ***doesn't change carry flag***
jnz checksum_dword_loop ; do more blocks
checksum_dword_loop_done label proc
adc eax,edx ; finish dword checksum
mov edx,0 ; prepare to load trailing word
adc eax,edx
;
; Compute checksum on the trailing word, if there is one.
; High word of EDX = 0 at this point
; Carry flag set iff there's a trailing word to do at this point
;
checksum_last_word label proc ; "proc" so not scoped to function
pop ecx ; get back word count
test ecx,1 ; is there a trailing word?
jz short checksum_done ; no trailing word
add ax,[esi] ; add in the trailing word
adc eax,0 ;
checksum_done label proc ; "proc" so not scoped to function
mov ecx,eax ; fold the checksum to 16 bits
ror ecx,16
add eax,ecx
mov ebx,[esp + buf]
shr eax,16
test ebx,1 ; check if buffer word aligned
jz short checksum_combine ; if zf set, buffer word aligned
ror ax,8 ; byte aligned--swap bytes back
checksum_combine label proc ; "proc" so not scoped to function
add ax, word ptr [esp + xsum] ; combine checksums
pop esi ; restore nonvolatile register
adc eax,0 ;
pop ebx ; restore nonvolatile register
stdRET tcpipxsum
REFLAB macro pre,suf
dd pre&suf
endm
align 4
loop_entry label dword
dd 0
TEMP=LOOP_UNROLLING*4
REPT LOOP_UNROLLING-1
TEMP=TEMP-4
reflab loop_entry_,%TEMP
ENDM
stdENDP tcpipxsum
end