News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Let's Optimize 32-bit graphics code.

Started by OceanJeff32, February 21, 2005, 04:11:28 AM

Previous topic - Next topic

OceanJeff32

here:
    mov esi, bitmap1        ; load pointer to beginning of bitmap
    mov ecx, maxx           ; load x length of screen into ecx
    shl ecx, 2                  ; multiply by 4 for 32-bit color.
    mov eax, 00FF00FFh   ; load purple 32-bit ARGB (Alpha, Red, Green, Blue)
    mov edi, 0                 ; set pointer to index into bitmap
    cld

here2:

; ?? WHY DOESN'T REP STOSD WORK RIGHT HERE INSTEAD OF THE NEXT THREE LINES OF CODE
; AND GET RID OF THE LABEL...JUST CURIOUS
; THIS IS THE CODE I WANT TO OPTIMIZE, THE PIXEL DRAWING.
;       This code draws a horiz line at top of screen, no matter what screen size.
    mov [esi+ecx], eax      ; this copies EAX (purple?) to esi+ecx (ecx is offset into bitmap)
                                    ; yes, this line is drawn from right to left.
    sub ecx, 3                  ; this subtracts 3 from ecx register
    loop here2                 ; here ecx is decremented by one, total subtraction 4
   
    invoke SetDIBitsToDevice,wnddc,0,0,maxx,maxy,\
           0,0,0,maxy,bitmap1,ADDR bminf,DIB_RGB_COLORS

    mov eax,maxx
    imul maxy
    lea eax,[eax*4]
    invoke RtlZeroMemory,bitmap1,eax

    inc fcount                     ; count the frames (in other thread)
   
    mov eax,stop                ; check for exit
    test eax,eax
    jz here

Wondering how to optimize this code, but I just think this line drawing algorithm for a horizontal line is very quick.

I get high frame rates even with several lines in a row.

Later guys,

Jeff C
:U
I just had to post something in Mark's forum.

P.S. Any clues on the REP STOSD ?? I can't get it to work in the program. :dazzled:
Any good programmer knows, every large and/or small job, is equally large, to the programmer!

OceanJeff32

here:
    mov eax, maxx
    imul maxy
    lea eax, [eax*4]
    push eax
    mov esi, bitmap1        ; load pointer to beginning of bitmap
    mov ecx, maxx           ; load x length of screen into ecx
    mov eax, 00FF00FFh      ; load purple 32-bit ARGB (Alpha, Red, Green, Blue)
    mov edi, bitmap1        ; set pointer to index into bitmap
    cld

here2:

    rep stosd

    invoke SetDIBitsToDevice,wnddc,0,0,maxx,maxy,\
           0,0,0,maxy,bitmap1,ADDR bminf,DIB_RGB_COLORS

    pop eax
    push eax
    invoke RtlZeroMemory,bitmap1,eax
   
    inc fcount                 ; count the frames
   
    mov eax,stop                ; check for exit
    test eax,eax
    jz here

Thanks guys, I got it! And I optimized the bitmap clear multiplying and etc outside the loop! (Awesome stuff)

Let me know what you think now.

Jeff C
:U :U
Any good programmer knows, every large and/or small job, is equally large, to the programmer!

Farabi


Whole Math FUnction


PosCount proto :dword,:dword,:dword,:dword

GetPosLine proto :dword,:dword,:dword,:dword
GetDeltaXY proto :dword,:dword,:dword,:dword
GetHypotenusa proto :dword,:dword
GetPosRound proto :dword,:dword
GetPosRound2 proto :dword,:dword
UMGetPosRound proto :dword,:dword
GetDegree proto :dword,:dword
GetScale proto :dword,:dword
GetPosX proto :dword,:dword
GetSin proto :dword
GetCos proto :dword
GetTan proto :dword
GetFG proto :dword,:dword


fGrad proto :dword,:dword,:dword
IsInsideRect proto :dword,:dword,:dword,:dword,:dword,:dword

aTan2 proto :dword,:dword
Deg2Rad proto :dword
Rad2Deg proto


CenterScreen proto :dword,:dword
To3D proto :dword,:dword,:dword,:dword,:dword,:dword
CreateLine proto :dword,:Dword,:dword,:dword
GetLine3D proto :dword,:Dword
GetCountLine3D proto
ResetLine proto

GetLine3DAll proto :dword,:dword
SetLine3DAll proto :dword,:Dword

MirrorPosA proto :dword,:dword,:dword
Phase1 proto

line3D struct
x dword 0
y dword 0
z dword 0
color dword 0
line3D ends

.data?
SCTbl dword ?
Line_Table dword ?
nLPtr dword ?
nLLimit dword ?

.code

SetLine3DAll proc uses esi edi lplines:dword,nNum:dword

mov esi,lplines
mov edi,Line_Table

mov ecx,nNum

cmp ecx,10000
jl @f
xor eax,eax
dec eax
ret
@@:

mov nLPtr,ecx

@@:
mov edx,[esi]
mov eax,[esi+4]
mov [edi],edx
mov [edi+4],eax
mov edx,[esi+8]
mov eax,[esi+12]
mov [edi+8],edx
mov [edi+12],eax
add esi,16
add edi,16
dec ecx
jnz @b


ret
SetLine3DAll endp


CreatePoint3D proc uses esi edi x:dword,y:dword,z:dword,color:dword

mov esi,Line_Table
xor edx,edx
mov ecx,nLPtr
mov eax,16
mul ecx
mov ecx,eax

cmp ecx,nLLimit
jl @f
xor eax,eax
dec eax
ret
@@:

mov eax,z
test eax,080000000h
jz no_zc
mov z,1
no_zc:

mov eax,x
mov [esi+ecx*1],eax
mov eax,y
mov [esi+ecx*1+4],eax
mov eax,z
mov [esi+ecx*1+8],eax
mov eax,color
mov [esi+ecx*1+12],eax

inc nLPtr


ret
CreatePoint3D endp

GetLine proc uses esi edi lpline:dword,npos:dword
LOCAL cnt:dword

mov ecx,npos
mov eax,16
mul ecx
mov esi,Line_Table
add esi,eax

mov edi,lpline


push [esi]
pop [edi]

push [esi+4]
pop [edi+4]

push [esi+8]
pop [edi+8]

push [esi+12]
pop [edi+12]

ret
GetLine endp

GetCountLine3D proc

mov eax,nLPtr

ret
GetCountLine3D endp

ResetLines proc uses esi edi

mov nLPtr,0

ret
ResetLines endp



Phase1 proc uses esi ; 28.8 Kbyte needed
LOCAL deg,sin,cos,tan:dword

invoke LocalAlloc,LMEM_DISCARDABLE,28800+14400 + 16*10000
invoke LocalLock,eax
mov SCTbl,eax

mov esi,eax ; 1 Clock cycle
xor ecx,ecx ; 1 Clock cycle
mov deg,ecx ; 1 Clock cycle

mov edx,3600 ; 1 Clock cycle
shl edx,2 ; 2 Clock cycle


@@:
finit ; 17 Clock cycle
pushad
invoke Deg2Rad,deg ; 104 Clock cycle
popad
fsincos ; 365 Clock cycle

fstp sin ; 8 Clock cycle
fstp cos ; 8 Clock cycle

mov eax,sin ; 1 Clock cycle
mov dword ptr[esi],eax ; 1 Clock cycle
mov eax,cos ; 1 Clock cycle
mov dword ptr[esi+edx],eax ; 1 Clock cycle

add deg,1 ; 3 Clock cycle
add esi,4 ; 1 Clock cycle
add ecx,4 ; 1 Clock cycle
cmp deg,3600 ; 2 Clock cycle
jl @b ; 3 Clock cycle
; 513 Clock cycle Each loop
; 1846800 Clock cycle total loop
; 1846806 Clock cycle
mov esi,SCTbl
add esi,28800
;Tan
mov edx,3600 ; 1 Clock cycle
shl edx,2 ; 2 Clock cycle

xor ecx,ecx
mov deg,ecx

@@:
finit ; 17 Clock cycle
pushad
invoke Deg2Rad,deg ; 104 Clock cycle
popad
fptan ; 273 Clock cycle
fstp tan

push tan
pop [esi]
add deg,1 ; 3 Clock cycle
add esi,4 ; 1 Clock cycle
add ecx,4 ; 1 Clock cycle
cmp deg,3600 ; 2 Clock cycle
jl @b ; 3 Clock cycle

mov esi,SCTbl
add esi,28800+14400
mov Line_Table,esi
mov eax,16*10000
mov nLLimit,eax
mov nLPtr,0

ret
Phase1 endp

MirrorPos proc x:dword,y:dword,reg:dword

mov edx,x
mov eax,y

cmp reg,0
jz brs
; cmp reg,1
; jz rev_plus_plus
cmp reg,1
jz min_plus
; cmp reg,3
; jz rev_min_plus
cmp reg,2
jz min_min
; cmp reg,5
; jz rev_min_min
cmp reg,3
jz plus_min
; cmp reg,7
; jz rev_plus_min
ret
min_plus:
neg edx
ret
min_min:
neg edx
neg eax
ret
plus_min:
neg eax
ret
rev_min_plus:
neg edx
xchg eax,edx
ret
rev_min_min:
neg edx
neg eax
xchg eax,edx
ret
rev_plus_min:
neg eax
xchg eax,edx
ret
rev_plus_plus:
xchg eax,edx
ret
brs:
xor eax,eax
ret
MirrorPos endp

PosCount proc x:dword,y:dword,maxX:dword,maxY:dword

mov eax,maxY ; 1 clock cycle
cmp y,eax ; 2 clock cycle
jae brs ; 1 clock cycle
mov eax,maxX ; 1 clock cycle
cmp x,eax ; 2 clock cycle
jae brs ; 1 clock cycle
;mov ecx,y
mov eax,1024 ; 1 clock cycle eax:=maxX
mul y ; 42 clock cyclee ax:=maxX*y
add eax,x ; 2 clock cycle eax:=(MaxX * Y)+x
ret ; 1 clock cycle
; 54 clock cycle
brs:
xor eax,eax
ret
PosCount endp

GetPosLine proc nDX:dword,nDY:dword,hyp:dword,nPos:dword
LOCAL x,y:dword

fild nDX ; 12
fidiv hyp ; 73
fimul nPos ; 14
fistp x ; 34

; (nPos*nDX)/hyp
; xor edx,edx ; 1
; xor eax,eax ; 1

; mov eax,nDX ; 1
; mul nPos ; 42
; div hyp ; 42
; mov x,eax ; 1

fild nDY ; 12
fidiv hyp ; 73
fimul nPos ; 14
fistp y ; 34

; (nPos*nDY)/hyp

; xor edx,edx ; 1
; xor eax,eax ; 1

; mov eax,nDY ; 1
; mul nPos ; 42
; div hyp ; 42
; mov y,eax ; 1

mov edx,x ; 1
mov eax,y ; 1

; FPU = 169 Clock cycle
; No FPU = 177 Clock cycle
ret
GetPosLine endp ; Result on edx:eax

GetDeltaXY proc x:dword,y:dword,x2:dword,y2:dword

mov edx,x2
sub edx,x
mov eax,y2
sub eax,y

ret
GetDeltaXY endp ; Result on edx:eax

GetHypotenusa proc nDX:dword,nDY:dword
LOCAL res:dword

mov eax,nDY
mul eax
mov res,eax
mov eax,nDX
mul eax

add res,eax
fild res
fsqrt
fistp res
mov eax,res

ret
GetHypotenusa endp ;

GetDegree proc nDX:dword,nDY:dword
LOCAL multiplier:dword

invoke aTan2,nDX,nDY
invoke Rad2Deg

ret
GetDegree endp ; eax=(atan(dy,dx)) * 20

GetScale proc delta:dword,scale:dword
LOCAL persen:dword

mov persen,200

fild scale
fidiv persen

fimul delta
fistp delta

mov eax, delta

ret
GetScale endp

GetPosX proc p:dword,d:dword


ret
GetPosX endp

fGrad proc x:dword,nMaxx:dword,nNum:dword

xor edx,edx ; 1
mov eax,nNum ; 2
mul x ; 42

div nMaxx ; 40
; 85 clock cycle

ret
fGrad endp  ; eax result ; edx the rest;

IsInsideRect proc mX:dword,mY:dword,x:dword,y:dword,l:dword,t:dword

mov eax,x
mov edx,y

cmp mX,eax
jl brs
cmp mY,edx
jl brs
add eax,l
add edx,t
cmp mX,eax
ja brs
cmp mY,edx
ja brs
xor eax,eax
inc eax
ret
brs:
xor eax,eax
dec eax
ret
IsInsideRect endp ; Result eax=1 if on the rect

aTan2 proc nDX:dword, nDY:dword

fild nDX ;
fild nDY ;
fpatan

ret
aTan2 endp ; result at st(0)

Deg2Rad proc deg:dword ; 104 Clock cycle

fldpi ; Load Phi 8  Clock cycle
push 180 ; Push 180 degree 1  Clock cycle
fidiv dword ptr[esp] ; Div it with #180 86 Clock cycle
pop eax ; eax are Junk 4  Clock cycle

fild deg ; Rot
push 10 ; 1  Clock cycle
fidiv dword ptr[esp] ; Rot/#100, We got float here
pop eax ; eax are Junk 4  Clock cycle

fmul st,st(1) ; Mul, a = (rot/#100) * (Phi/180)

ret
Deg2Rad endp ; Result at FPU

Rad2Deg proc ; Source must be at st(0)

push 3600
fldpi
fidivr dword ptr[esp]
fmul st,st(1)
fistp dword ptr[esp]
pop eax

ret
Rad2Deg endp

To3D proc x:dword,y:dword,z:dword,nMaxX:dword,nMaxY:dword,deg:dword
LOCAL f,g,sx,sy:dword
LOCAL xc,yc:dword
; sx=x*f/z + 1/2 nMaxX
; sy=y*g/z + 1/2 nMaxY
; f=w/tan(a)
; g=h/tan(b)

invoke GetFG,nMaxX,deg
mov f,eax
invoke GetFG,nMaxY,deg
mov g,eax

invoke CenterScreen,nMaxX,nMaxY
mov xc,edx
mov yc,eax

xor edx,edx
mov eax,x

fild f
fimul x
fidiv z
fistp f
mov eax,f

add eax,xc
mov sx,eax

xor edx,edx
mov eax,y

fild g
fimul y
fidiv z
fistp g
mov eax,g

add eax,yc
mov edx,sx


ret
To3D endp

CenterScreen proc nMaxX:dword,nMaxY:dword


mov edx,nMaxX
mov eax,nMaxY

shr edx,1
shr eax,1

ret
CenterScreen endp

GetPosRound proc delta:dword,nRot:dword
LOCAL r_sin:dword,r_cos:dword
LOCAL divider:dword

; sin(nRot/20)=sin(radian)

mov divider,20

fild nRot
fidiv divider
fsincos

fimul delta
fistp r_sin

fimul delta
fistp r_cos

mov edx,r_cos
mov eax,r_sin


ret
GetPosRound endp ; edx= cos(nRot)*delta, eax= sin(nRot)*Delta

GetPosRound2 proc delta:dword,deg:dword ; 604 Clock cycle
LOCAL r_sin:dword,r_cos:dword

finit ; 17 Clock cycle
invoke Deg2Rad,deg ; 104 Clock cycle
fsincos ; 365 Clock cycle

fimul delta ; 24 Clock cycle
fistp r_sin ; 34 Clock cycle

fimul delta ; 24 Clock cycle
fistp r_cos ; 34 Clock cycle

mov edx,r_cos ; 1 Clock cycle
mov eax,r_sin ; 1 Clock cycle

ret
GetPosRound2 endp

UMGetPosRound proc uses esi delta:dword,deg:dword ; 138 Clock cycle
LOCAL r_sin,r_cos:dword

; mov esi,SCTbl ; 1 Clock cycle
; mov edx,3600 ; 1 Clock cycle
; shl edx,2 ; 2 Clock cycle
; shl deg,2 ; 4 Clock cycle

; add esi,edx ; 1 Clock cycle
; add esi,deg ; 2 Clock cycle
; fld dword ptr[esi] ; 3 Clock cycle
; mov esi,lpmem ; 1 Clock cycle
; add esi,deg ; 2 Clock cycle
; fld dword ptr[esi] ; 3 Clock cycle

; Until here 20 Clock cycle

invoke GetSin,deg ; 3 Clock cycle
push eax ; 1 Clock cycle
fld dword ptr[esp] ; 3 Clock cycle
pop eax ; 1 Clock cycle
fimul delta ; 24 Clock cycle
fistp r_sin ; 34 Clock cycle

invoke GetCos,deg ; 4 Clock cycle
push eax ; 1 Clock cycle
fld dword ptr[esp] ; 3 Clock cycle
pop eax ; 1 Clock cycle
fimul delta ; 24 Clock cycle
fistp r_cos ; 34 Clock cycle

mov edx,r_cos ; 1 Clock cycle
mov eax,r_sin ; 1 Clock cycle
; 135 Clock cycle

ret
UMGetPosRound endp

GetSin proc uses esi deg:dword

mov esi,SCTbl ; 1 Clock cycle
mov ecx,deg ; 1 Clock cycle
mov eax,dword ptr[esi+ecx*4] ; 1 Clock cycle

ret
GetSin endp

GetCos proc uses esi deg:dword

mov esi,SCTbl ; 1 Clock cycle
add esi,14400 ; 1 Clock cycle
mov ecx,deg ; 1 Clock cycle
mov eax,dword ptr[esi+ecx*4] ; 1 Clock cycle

ret
GetCos endp

GetTan proc uses esi deg:dword

mov esi,SCTbl ; 1 Clock cycle
add esi,28800 ; 1 Clock cycle
mov ecx,deg ; 1 Clock cycle
mov eax,dword ptr[esi+ecx*4] ; 1 Clock cycle

ret
GetTan endp

GetFG proc uses esi w:dword,deg:dword

invoke GetTan,deg
push eax
fld dword ptr[esp]
fidivr w
fistp dword ptr[esp]
pop eax

; eax= result
ret
GetFG endp

Those who had universe knowledges can control the world by a micro processor.
http://www.wix.com/farabio/firstpage

"Etos siperi elegi"