357 lines
8.5 KiB
NASM
357 lines
8.5 KiB
NASM
title "Zero memory pages using fastest means available"
|
||
|
||
|
||
; Copyright (c) 1998 Microsoft Corporation
|
||
|
||
; Module Name:
|
||
|
||
; zero.asm
|
||
|
||
; Abstract:
|
||
|
||
; Zero memory pages using the fastest means available.
|
||
|
||
; Author:
|
||
|
||
; Peter Johnston (peterj) 20-Jun-1998.
|
||
; Critical sections of Katmai code adapted from in-line
|
||
; assembly version by Shiv Kaushik or Intel Corp.
|
||
|
||
; Environment:
|
||
|
||
; x86
|
||
|
||
; Revision History:
|
||
|
||
|
||
|
||
.386p
|
||
.xlist
|
||
include ks386.inc
|
||
include callconv.inc
|
||
include mac386.inc
|
||
.list
|
||
|
||
|
||
; Register Definitions (for instruction macros).
|
||
|
||
|
||
rEAX equ 0
|
||
rECX equ 1
|
||
rEDX equ 2
|
||
rEBX equ 3
|
||
rESP equ 4
|
||
rEBP equ 5
|
||
rESI equ 6
|
||
rEDI equ 7
|
||
|
||
|
||
; Define SIMD instructions used in this module.
|
||
|
||
|
||
if 0
|
||
|
||
; these remain for reference only. In theory the stuff following
|
||
; should generate the right code.
|
||
|
||
xorps_xmm0_xmm0 macro
|
||
db 0FH, 057H, 0C0H
|
||
endm
|
||
|
||
movntps_edx macro Offset
|
||
db 0FH, 02BH, 042H, Offset
|
||
endm
|
||
|
||
movaps_esp_xmm0 macro
|
||
db 0FH, 029H, 004H, 024H
|
||
endm
|
||
|
||
movaps_xmm0_esp macro
|
||
db 0FH, 028H, 004H, 024H
|
||
endm
|
||
|
||
endif
|
||
|
||
xorps macro XMMReg1, XMMReg2
|
||
db 0FH, 057H, 0C0H + (XMMReg1 * 8) + XMMReg2
|
||
endm
|
||
|
||
movntps macro GeneralReg, Offset, XMMReg
|
||
db 0FH, 02BH, 040H + (XmmReg * 8) + GeneralReg, Offset
|
||
endm
|
||
|
||
sfence macro
|
||
db 0FH, 0AEH, 0F8H
|
||
endm
|
||
|
||
movaps_load macro XMMReg, GeneralReg
|
||
db 0FH, 028H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg
|
||
endm
|
||
|
||
movaps_store macro GeneralReg, XMMReg
|
||
db 0FH, 029H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg
|
||
endm
|
||
|
||
|
||
|
||
; NPX Save and Restore
|
||
|
||
|
||
fxsave macro Register
|
||
db 0FH, 0AEH, Register
|
||
endm
|
||
|
||
fxrstor macro Register
|
||
db 0FH, 0AEH, 8+Register
|
||
endm
|
||
|
||
|
||
_TEXT SEGMENT DWORD PUBLIC 'CODE'
|
||
ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
|
||
|
||
|
||
|
||
; VOID
|
||
; KeZeroPage(
|
||
; PageBase
|
||
; )
|
||
|
||
; Routine Description:
|
||
|
||
; KeZeroPage is really just a function pointer that points at
|
||
; either KiZeroPage or KiXMMIZeroPage depending on whether or
|
||
; not XMMI instructions are available.
|
||
|
||
; Arguments:
|
||
|
||
; (ecx) PageBase Base address of page to be zeroed.
|
||
|
||
|
||
; Return Value:
|
||
|
||
|
||
|
||
|
||
page ,132
|
||
subttl "KiXMMIZeroPageNoSave - Use XMMI to zero memory (XMMI owned)"
|
||
|
||
|
||
|
||
; VOID
|
||
; KiXMMIZeroPageNoSave (
|
||
; IN PVOID PageBase
|
||
; )
|
||
|
||
; Routine Description:
|
||
|
||
; Use XMMI to zero a page of memory 16 bytes at a time while
|
||
; at the same time minimizing cache polution.
|
||
|
||
; Note: The XMMI register set belongs to this thread. It is neither
|
||
; saved nor restored by this procedure.
|
||
|
||
; Arguments:
|
||
|
||
; (ecx) PageBase Virtual address of the base of the page to be zeroed.
|
||
|
||
; Return Value:
|
||
|
||
; None.
|
||
|
||
|
||
|
||
INNER_LOOP_BYTES equ 64
|
||
|
||
cPublicFastCall KiXMMIZeroPageNoSave,1
|
||
cPublicFpo 0, 1
|
||
|
||
xorps 0, 0 ; zero xmm0 (128 bits)
|
||
mov eax, PAGE_SIZE/INNER_LOOP_BYTES ; Number of Iterations
|
||
|
||
inner:
|
||
|
||
movntps rECX, 0, 0 ; store bytes 0 - 15
|
||
movntps rECX, 16, 0 ; 16 - 31
|
||
movntps rECX, 32, 0 ; 32 - 47
|
||
movntps rECX, 48, 0 ; 48 - 63
|
||
|
||
add ecx, 64 ; increment base
|
||
dec eax ; decrement loop count
|
||
jnz short inner
|
||
|
||
; Force all stores to complete before any other
|
||
; stores from this processor.
|
||
|
||
; BUGBUG Does this mean we need an sfence on context switch?
|
||
; (I suspect yes if the processor owns the XMMI context - peterj).
|
||
|
||
sfence
|
||
|
||
ifndef SFENCE_IS_NOT_BUSTED
|
||
|
||
; BUGBUG the next uncached write to this processor's apic
|
||
; may fail unless the store pipes have drained. sfence by
|
||
; itself is not enough. Force drainage now by doing an
|
||
; interlocked exchange.
|
||
|
||
xchg [esp-4], eax
|
||
|
||
endif
|
||
|
||
fstRET KiXMMIZeroPageNoSave
|
||
|
||
fstENDP KiXMMIZeroPageNoSave
|
||
|
||
|
||
page ,132
|
||
subttl "KiXMMIZeroPage - Use XMMI to zero memory"
|
||
|
||
|
||
|
||
; VOID
|
||
; KiXMMIZeroPage (
|
||
; IN PVOID PageBase
|
||
; )
|
||
|
||
; Routine Description:
|
||
|
||
; Use XMMI to zero a page of memory 16 bytes at a time. This
|
||
; routine is a wrapper around KiXMMIZeroPageNoSave. In this
|
||
; case we don't have the luxury of not saving/restoring context.
|
||
|
||
; Arguments:
|
||
|
||
; (ecx) PageBase Virtual address of the base of the page to be zeroed.
|
||
|
||
; Return Value:
|
||
|
||
; None.
|
||
|
||
|
||
|
||
cPublicFastCall KiXMMIZeroPage,1
|
||
cPublicFpo 0, 2
|
||
|
||
mov eax, PCR[PcInitialStack]
|
||
mov edx, PCR[PcPrcbData+PbCurrentThread]
|
||
push ebp
|
||
push ebx
|
||
mov ebp, esp ; save stack pointer
|
||
sub esp, 16 ; reserve space for xmm0
|
||
and esp, 0FFFFFFF0H ; 16 byte aligned
|
||
cli ; don't context switch
|
||
test [eax].FpCr0NpxState, CR0_EM ; if FP explicitly disabled
|
||
jnz short kxzp90 ; do it the old way
|
||
cmp byte ptr [edx].ThNpxState, NPX_STATE_LOADED
|
||
je short kxzp80 ; jiff, NPX stated loaded
|
||
|
||
; NPX state is not loaded on this thread, it will be by
|
||
; the time we reenable context switching.
|
||
|
||
mov byte ptr [edx].ThNpxState, NPX_STATE_LOADED
|
||
|
||
; enable use of FP instructions
|
||
|
||
mov ebx, cr0
|
||
and ebx, NOT (CR0_MP+CR0_TS+CR0_EM)
|
||
mov cr0, ebx ; enable NPX
|
||
|
||
ifdef NT_UP
|
||
|
||
; if this is a UP machine, the state might be loaded for
|
||
; another thread in which case it needs to be saved.
|
||
|
||
mov ebx, PCR[PcPrcbData+PbNpxThread]; Owner of NPX state
|
||
or ebx, ebx ; NULL?
|
||
jz short @f ; yes, skip save.
|
||
|
||
mov byte ptr [ebx].ThNpxState, NPX_STATE_NOT_LOADED
|
||
mov ebx, [ebx].ThInitialStack ; get address of save
|
||
sub ebx, NPX_FRAME_LENGTH ; area.
|
||
fxsave rEBX ; save NPX
|
||
@@:
|
||
|
||
endif
|
||
|
||
; Now load the NPX context for this thread. This is because
|
||
; if we switch away from this thread it will get saved again
|
||
; in this save area and destroying it would be bad.
|
||
|
||
fxrstor rEAX
|
||
|
||
mov PCR[PcPrcbData+PbNpxThread], edx
|
||
|
||
kxzp80:
|
||
sti ; reenable context switching
|
||
movaps_store rESP, 0 ; save xmm0
|
||
fstCall KiXMMIZeroPageNoSave ; zero the page
|
||
movaps_load 0, rESP ; restore xmm
|
||
|
||
; restore stack pointer, non-volatiles and return
|
||
|
||
mov esp, ebp
|
||
pop ebx
|
||
pop ebp
|
||
fstRET KiXMMIZeroPage
|
||
|
||
|
||
; FP is explicitly disabled for this thread (probably a VDM
|
||
; thread). Restore stack pointer, non-volatiles and jump into
|
||
; KiZeroPage to do the work the old fashioned way.
|
||
|
||
kxzp90:
|
||
sti
|
||
mov esp, ebp
|
||
pop ebx
|
||
pop ebp
|
||
jmp short @KiZeroPage@4
|
||
|
||
fstENDP KiXMMIZeroPage
|
||
|
||
|
||
page ,132
|
||
subttl "KiZeroPage - Available to all X86 processors"
|
||
|
||
|
||
|
||
; KiZeroPage(
|
||
; PVOID PageBase
|
||
; )
|
||
|
||
; Routine Description:
|
||
|
||
; Generic Zero Page routine, used on processors that don't have
|
||
; a more effecient way to zero large blocks of memory.
|
||
; (Same as RtlZeroMemory).
|
||
|
||
; Arguments:
|
||
|
||
; (ecx) PageBase Base address of page to be zeroed.
|
||
|
||
; Return Value:
|
||
|
||
; None.
|
||
|
||
|
||
|
||
cPublicFastCall KiZeroPage,1
|
||
cPublicFpo 0, 0
|
||
|
||
push edi ; save EDI (non-volatile)
|
||
xor eax, eax ; 32 bit zero
|
||
mov edi, ecx ; setup for repsto
|
||
mov ecx, PAGE_SIZE/4 ; iteration count
|
||
|
||
; store eax, ecx times starting at edi
|
||
|
||
rep stosd
|
||
|
||
pop edi ; restore edi and return
|
||
fstRET KiZeroPage
|
||
|
||
fstENDP KiZeroPage
|
||
|
||
|
||
_TEXT ends
|
||
end
|